In [1]:
import pandas as pd
import numpy as np

np.random.seed(594)

In [2]:
df = pd.read_csv('nottm_postcodes.csv')

In [3]:
df.head()

Unnamed: 0,Postcode
0,NG9 3WF
1,NG9 4WP
2,NG9 3EL
3,NG1 9FH
4,NG5 6QZ


In [4]:
num_donations = 1+np.round(5**np.random.randn(100))
df['NumberDonations'] = pd.Series(num_donations).astype(int)

In [5]:
total_donated = np.round(np.abs(15*num_donations + 20**np.random.randn(100)))
df['TotalDonated'] = pd.Series(total_donated).astype(int)

In [6]:
df['AverageDonated'] = np.round(df['TotalDonated']/df['NumberDonations'], decimals=2)
df

Unnamed: 0,Postcode,NumberDonations,TotalDonated,AverageDonated
0,NG9 3WF,4,61,15.25
1,NG9 4WP,1,21,21.00
2,NG9 3EL,1,27,27.00
3,NG1 9FH,5,75,15.00
4,NG5 6QZ,1,15,15.00
...,...,...,...,...
95,NG2 1WY,1,15,15.00
96,NG8 1ND,10,164,16.40
97,NG9 2QA,1,15,15.00
98,NG3 1FF,22,333,15.14


In [7]:
df['NumberDonations'].describe()

count    100.000000
mean       4.320000
std        5.454828
min        1.000000
25%        1.000000
50%        2.000000
75%        5.000000
max       37.000000
Name: NumberDonations, dtype: float64

In [8]:
newsletter = (np.random.rand(100) > 0.5).astype(int)
newsletter

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0])

In [9]:
def generate_newsletter(total_donated):
    # Define the probability of being 1 based on TotalDonated
    probability = total_donated / np.max(total_donated)  # Normalize to [0, 1] range
    # Scale the effect of TotalDonated and add random noise
    probability_with_noise = probability + 2*np.random.randn()  # Adjust scale as needed
    # Ensure probability is within valid range [0, 1]
    probability_with_noise = np.clip(probability_with_noise, 0, 1)
    # Generate random boolean value with higher probability for 1 if TotalDonated is large with added noise
    return np.random.rand() < probability_with_noise

In [10]:
df['Newsletter'] = df['TotalDonated'].apply(generate_newsletter).astype(int)

In [11]:
df['Newsletter'].describe()

count    100.000000
mean       0.620000
std        0.487832
min        0.000000
25%        0.000000
50%        1.000000
75%        1.000000
max        1.000000
Name: Newsletter, dtype: float64

In [12]:
df

Unnamed: 0,Postcode,NumberDonations,TotalDonated,AverageDonated,Newsletter
0,NG9 3WF,4,61,15.25,0
1,NG9 4WP,1,21,21.00,0
2,NG9 3EL,1,27,27.00,0
3,NG1 9FH,5,75,15.00,0
4,NG5 6QZ,1,15,15.00,1
...,...,...,...,...,...
95,NG2 1WY,1,15,15.00,0
96,NG8 1ND,10,164,16.40,1
97,NG9 2QA,1,15,15.00,1
98,NG3 1FF,22,333,15.14,1


In [13]:
df.to_csv('FakeIndividualConstituents.csv', index=False)