---
draft: true
---

In [6]:
import pandas as pd
import numpy as np

np.random.seed(594)

In [7]:
df = pd.read_csv('nottm_postcodes.csv')

In [8]:
df.head()

Unnamed: 0,Postcode
0,NG9 3WF
1,NG9 4WP
2,NG9 3EL
3,NG1 9FH
4,NG5 6QZ


In [9]:
num_donations = 1+np.round(5**np.random.randn(100))
df['NumberDonations'] = pd.Series(num_donations).astype(int)

In [10]:
total_donated = np.round(np.abs(15*num_donations + 20**np.random.randn(100)))
df['TotalDonated'] = pd.Series(total_donated).astype(int)

In [11]:
df['AverageDonated'] = np.round(df['TotalDonated']/df['NumberDonations'], decimals=2)
df

Unnamed: 0,Postcode,NumberDonations,TotalDonated,AverageDonated
0,NG9 3WF,4,61,15.25
1,NG9 4WP,1,23,23.00
2,NG9 3EL,1,30,30.00
3,NG1 9FH,5,75,15.00
4,NG5 6QZ,1,15,15.00
...,...,...,...,...
95,NG2 1WY,1,15,15.00
96,NG8 1ND,10,169,16.90
97,NG9 2QA,1,15,15.00
98,NG3 1FF,22,333,15.14


In [12]:
df['NumberDonations'].describe()

count    100.000000
mean       4.320000
std        5.454828
min        1.000000
25%        1.000000
50%        2.000000
75%        5.000000
max       37.000000
Name: NumberDonations, dtype: float64

In [13]:
newsletter = (np.random.rand(100) > 0.5).astype(int)
newsletter

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0])

In [14]:
def add_newsletter(df):
    # Create a score that is a combination of 'NumberDonations' and 'TotalDonated'
    score = df['NumberDonations'] + df['TotalDonated'] + df['NumberDonations'] * df['TotalDonated']

    # Add some random noise to 'Score'
    score += np.random.normal(0, 0.1, df.shape[0])

    # Create 'Newsletter' column by thresholding 'Score' such that the mean of 'Newsletter' is about 0.5
    threshold = np.percentile(score, 50)  # 50 percentile, i.e., median
    df['Newsletter'] = (score > threshold).astype(int)

    return df

In [15]:
df = add_newsletter(df)
df['Newsletter'].describe()

count    100.000000
mean       0.500000
std        0.502519
min        0.000000
25%        0.000000
50%        0.500000
75%        1.000000
max        1.000000
Name: Newsletter, dtype: float64

In [16]:
df

Unnamed: 0,Postcode,NumberDonations,TotalDonated,AverageDonated,Newsletter
0,NG9 3WF,4,61,15.25,1
1,NG9 4WP,1,23,23.00,0
2,NG9 3EL,1,30,30.00,0
3,NG1 9FH,5,75,15.00,1
4,NG5 6QZ,1,15,15.00,0
...,...,...,...,...,...
95,NG2 1WY,1,15,15.00,0
96,NG8 1ND,10,169,16.90,1
97,NG9 2QA,1,15,15.00,0
98,NG3 1FF,22,333,15.14,1


In [17]:
df.to_csv('FakeIndividualConstituents.csv', index=False)