# data_to_analysis_matched.py

- From Data to Matched Analysis

This code chunk contains to processes. First, we go from the datasets to a match with a certain election, to a match with the nearest winner. That dataset is then used to find the wealths of those politicians, and then we can compute an estimate of the average winner and average loser wealth. 

- From Data to Unconditional Analysis

Secondly, we start again from the datasets, and we go to margins. This dataset is merged with a dataset of politicians with the margins (conditional on the margin being <$x$%, but not necessarily from the same elections. 

In [2]:
import pandas as pd
import numpy as np
import re
from pandas_ods_reader import read_ods

from scipy import stats
from statsmodels.formula.api import ols
import statsmodels.api as sm
from statsmodels.compat import lzip
import numpy as np

from matplotlib import pyplot as plt

from tqdm import tqdm

import fuzzywuzzy as fw
from fuzzy_match import match
from fuzzy_match import algorithims
from fuzzywuzzy import process

## From Data to Matched Analysis

First step: 

- Import the data

- The data comes straight away from the google drive sheets, so this code can be rerun every time new observations are obtained. 

In [3]:
data1 = pd.read_csv("../Administration/new_data_entry_file.csv").iloc[:,1:]
data2 = pd.read_csv("../Administration/new_data_entry_file_lessclose.csv")

nonpoliticians = pd.concat([data1, data2])
nonpoliticians = nonpoliticians[nonpoliticians['Gevonden'] == 'ja']
nonpoliticians['jaar'] = nonpoliticians['Verkiezingdatum'].astype(str).str.split('/').apply(lambda x : x[2])

nonpoliticians.head(2)

Unnamed: 0,Naam,District,Verkiezingdatum,InTKEK,Gevonden,Gebdatum,Sterfdatum,Sterfplaats,Vermogen,Link1,Link2,Polpartij,Opm,Unnamed: 13,jaar
0,mr. J. van Riemsdijk,Almelo,02/01/1849,nee,ja,,01/11/1879,Almelo,29618.27,https://www.genealogieonline.nl/en/genealogie-...,,,,,1849
7,D. Mulder Dzn.,Winschoten,12/03/1878,nee,ja,27/11/1825,17/09/1889,Amsterdam,1018.44,https://genealogiewinschoten.nl/diversen/oud-n...,https://noord-hollandsarchief.nl/bronnen/archi...,,,,1878


Second step:

- Import the `allelected.csv` dataset
- Then, left merge the nonpoliticians data with the `allected.csv` data 
    - This choice: we leave 'double' matches in the dataset: one nonpolitician can have two or more politician matches
    - Detailed approach: first, match on exact election date
    - If that doesn't work, match on election year

In [4]:
def decode_accents(name):
    try:
        return name.encode('latin-1').decode('raw_unicode_escape').encode('latin-1').decode('utf-8')
    except:
        return name

In [5]:
#allelected = pd.read_csv("../Data/elections/allelected.csv", encoding='latin1')#
all_candidates_elections = pd.read_csv("../Data/elections/election_results_details.csv", encoding='utf-8-sig')
all_candidates_elections["Naam"] = all_candidates_elections["Naam"].apply(lambda x : decode_accents(x))
#print(all_candidates_elections.shape)
all_candidates_elections.head(2)

Unnamed: 0.1,Unnamed: 0,Naam,Aanbevolen door,Aantal stemmen,Procentueel,District,Verkiezingdatum,Type,Omvang electoraat,Opkomst,Aantal stembriefjes,Aantal stemmen geldig,Aantal stemmen blanco,Aantal zetels,Kiesdrempel
0,0,mr. B. Wichers,,700,83.73%,Groningen,30/11/1848,algemeen,1191,838,838,836,0,1,418
1,1,W.L. de Sturler,,34,4.07%,Groningen,30/11/1848,algemeen,1191,838,838,836,0,1,418


In [6]:
allelected = pd.read_csv("../Data/elections/allelected.csv", encoding='latin1')
print(allelected.shape)
allelected.head(2)

(2508, 12)


Unnamed: 0,achternaam,voornaam,tussenvoegsel,jaar,maand,dag,type verkiezing,districtsnaam,aantal stemmen,omvang_electoraat,zetels,drempel
0,Aalberse,P.J.M.,,1903,2,18,tussentijds,Almelo,3821,7865,1.0,2953.0
1,Aalberse,P.J.M.,,1905,6,16,algemeen,Almelo,5217,9324,1.0,3922.0


In [7]:
# Clean the data a little bit

allelected['maand'] = np.where(allelected['maand'] < 10, '0' + allelected['maand'].astype(str), allelected['maand'])
allelected['dag'] = np.where(allelected['dag'] < 10, '0' + allelected['dag'].astype(str), allelected['dag'])
allelected["Verkiezingdatum"] = allelected.dag.astype(str).str.cat(others=[allelected.maand.astype(str), allelected.jaar.astype(str)], sep='/')
allelected.rename(columns={'districtsnaam':'District'}, inplace = True)

allelected.head(2)

Unnamed: 0,achternaam,voornaam,tussenvoegsel,jaar,maand,dag,type verkiezing,District,aantal stemmen,omvang_electoraat,zetels,drempel,Verkiezingdatum
0,Aalberse,P.J.M.,,1903,2,18,tussentijds,Almelo,3821,7865,1.0,2953.0,18/02/1903
1,Aalberse,P.J.M.,,1905,6,16,algemeen,Almelo,5217,9324,1.0,3922.0,16/06/1905


In [8]:
all_candidates_elections["jaar"] = all_candidates_elections["Verkiezingdatum"].apply(lambda x : int(x.split("/")[2]))
all_candidates_elections["maand"] = all_candidates_elections["Verkiezingdatum"].apply(lambda x : int(x.split("/")[1]))
all_candidates_elections["dag"] = all_candidates_elections["Verkiezingdatum"].apply(lambda x : int(x.split("/")[0]))
all_candidates_elections.head(2)

Unnamed: 0.1,Unnamed: 0,Naam,Aanbevolen door,Aantal stemmen,Procentueel,District,Verkiezingdatum,Type,Omvang electoraat,Opkomst,Aantal stembriefjes,Aantal stemmen geldig,Aantal stemmen blanco,Aantal zetels,Kiesdrempel,jaar,maand,dag
0,0,mr. B. Wichers,,700,83.73%,Groningen,30/11/1848,algemeen,1191,838,838,836,0,1,418,1848,11,30
1,1,W.L. de Sturler,,34,4.07%,Groningen,30/11/1848,algemeen,1191,838,838,836,0,1,418,1848,11,30


In [9]:
# Extract the number of seats for each elections from the allelected df
hoeveelzetels = allelected.groupby(['District','Verkiezingdatum'])['zetels'].mean()
hoeveelzetels.head(2)

hoeveelzetels["Alkmaar"]["01/03/1892"]

1.0

In [10]:
def to_int(x):
    try:
        return int(x)
    except:
        return None
    
all_candidates_elections['Aantal stemmen'] = all_candidates_elections['Aantal stemmen'].apply(to_int)

In [11]:
grouped_allelec = all_candidates_elections.groupby(['District', 'Verkiezingdatum']).apply(lambda x : x.sort_values(['Aantal stemmen'], ascending = False))
grouped_allelec = grouped_allelec.reset_index(level=-1, drop=True)
grouped_allelec.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,Naam,Aanbevolen door,Aantal stemmen,Procentueel,District,Verkiezingdatum,Type,Omvang electoraat,Opkomst,Aantal stembriefjes,Aantal stemmen geldig,Aantal stemmen blanco,Aantal zetels,Kiesdrempel,jaar,maand,dag
District,Verkiezingdatum,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Alkmaar,01/03/1892,4934,mr. W. van der Kaay,,1283.0,97.05%,Alkmaar,01/03/1892,tussentijds,4783,1399,1398,1322,74,1,661,1892,3,1
Alkmaar,01/09/1887,2126,mr. W. van der Kaay,,1356.0,75.42%,Alkmaar,01/09/1887,algemeen,4165,1835,1837,3596,72,2,899,1887,9,1


In [12]:
a = grouped_allelec.loc[("Alkmaar", "15/06/1917")]
a

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,Naam,Aanbevolen door,Aantal stemmen,Procentueel,District,Verkiezingdatum,Type,Omvang electoraat,Opkomst,Aantal stembriefjes,Aantal stemmen geldig,Aantal stemmen blanco,Aantal zetels,Kiesdrempel,jaar,maand,dag
District,Verkiezingdatum,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Alkmaar,15/06/1917,8114,jhr.mr. P. van Foreest,VL,,enige kandidaat,Alkmaar,15/06/1917,algemeen/enkelvoudig,12557,-,-,-,-,-,-,1917,6,15


In [13]:
# Brief outline togo chto ja xochu

allwinners = []

for i in all_candidates_elections.groupby(['District', 'Verkiezingdatum']).groups:
    
    cur_election = grouped_allelec.loc[(i[0], i[1])]
    
    # Ischi skolko sideniy v dataframe hoeveelzetels i pishi eto v peremenuiu
    try:
        skolkosidenij = hoeveelzetels[i[0]][i[1]]
    except:
        skolkosidenij = cur_election['Aantal zetels'].iloc[0]
    # Vybrat stolko nabliudenij skolko ukazano v tolkto chto cdelannoi peremennoy
    try:
        int(skolkosidenij)
    except:
        continue
    cur_winners = cur_election.head(int(skolkosidenij))
    allwinners.append(cur_winners)

allwinners_df = pd.concat(allwinners).reset_index(drop=True)
allwinners_df.head(5)

Unnamed: 0.1,Unnamed: 0,Naam,Aanbevolen door,Aantal stemmen,Procentueel,District,Verkiezingdatum,Type,Omvang electoraat,Opkomst,Aantal stembriefjes,Aantal stemmen geldig,Aantal stemmen blanco,Aantal zetels,Kiesdrempel,jaar,maand,dag
0,4934,mr. W. van der Kaay,,1283.0,97.05%,Alkmaar,01/03/1892,tussentijds,4783,1399,1398,1322,74,1,661,1892,3,1
1,2126,mr. W. van der Kaay,,1356.0,75.42%,Alkmaar,01/09/1887,algemeen,4165,1835,1837,3596,72,2,899,1887,9,1
2,2127,mr. J.L. de Bruyn Kops,,1348.0,74.97%,Alkmaar,01/09/1887,algemeen,4165,1835,1837,3596,72,2,899,1887,9,1
3,2079,jhr.mr. C. van Foreest,,1444.0,57.85%,Alkmaar,04/02/1868,herstemming,3232,2507,2509,2496,8,1,1248,1868,2,4
4,4940,A.P. de Lange,,1961.0,63.28%,Alkmaar,05/06/1894,tussentijds,4779,3124,3124,3099,21,1,1550,1894,6,5


In [13]:
#allwinners_df[allwinners_df['Naam'].str.contains("Treub")]

In [14]:
# What to do?
## Step 1: Take all nonpoliticians
## Merge it with allwinners_df on the basis of District, Verkiezingsdatum
close = nonpoliticians.merge(allwinners_df, how="left", left_on=['District', 'Verkiezingdatum'], right_on = ['District', 'Verkiezingdatum'])
## Join with how='left', left nonpoliticians, 
## Check what to do in case of double matches (I want to keep all)

In [15]:
del(allwinners_df); del(grouped_allelec); del(all_candidates_elections)

# Check

We check if there are some elections that have not been matched with politicians:

In [16]:
close[close['Naam_y'] == None]

Unnamed: 0,Naam_x,District,Verkiezingdatum,InTKEK,Gevonden,Gebdatum,Sterfdatum,Sterfplaats,Vermogen,Link1,...,Omvang electoraat,Opkomst,Aantal stembriefjes,Aantal stemmen geldig,Aantal stemmen blanco,Aantal zetels,Kiesdrempel,jaar_y,maand,dag


# Write function 

To find the polid from the tk_1815tot1950uu.xlsx dataset, and move it back to the `close` dataset. 

In [17]:
# df (supposed to be the dataframe close) has to contain Naam_y = name of the politician
def find_polid(df):
    
    ## Change some stupid things in Naam_y
    df['Naam_y'] = df['Naam_y'].apply(lambda x: " ".join(re.sub("baron|ridder|jhr.|mr.|jhr.mr.|dr.", "", 
                                      x).split()))
    
    ## HERE THERE CAN BE A BETTER WAY TO REMOVE WHITESPACE FROM THE LAST NAME OF POLS
    polidlist = pd.read_excel("../Data/politician_data/tk_1815tot1950uu.xlsx", dtype={'b1-nummer':str})
    polidlist['voorlachternaam'] = polidlist['voorletters'] + ' ' + polidlist['achternaam']
    
    matched_b1_nummer = []

    for i in tqdm(range(len(df))):
        
        nameofpoliticianindf = df.iloc[i]['Naam_y']

        dateonwhichtofilter = pd.Timestamp(df.iloc[i]['Verkiezingdatum'])
        upperlimit_begin = pd.Timestamp(dateonwhichtofilter) + pd.DateOffset(years=1) 
        
        #first filter, #second filter: they can also not have terminated their career before the election (because they were elected)
        potentialmatches = polidlist[polidlist['begin periode'].apply(lambda x: pd.Timestamp(x)) < upperlimit_begin]
        potentialmatches = potentialmatches[potentialmatches['einde periode'].apply(lambda x: pd.Timestamp(x) + pd.DateOffset(days=-50)) > dateonwhichtofilter]
        
        # first matching attempt - without the filter         
        listofpotmatches = [i for i in polidlist['voorlachternaam']]
        
        match_prelim = match.extractOne(nameofpoliticianindf, listofpotmatches, match_type='jaro_winkler')  
        #print(nameofpoliticianindf, match_prelim[0])
        if match_prelim[1] > 0.95:
            match_defin = match_prelim[0]
            matched_b1_nummer.append(polidlist[polidlist['voorlachternaam'] == match_defin]['b1-nummer'].iloc[0])
            
            continue
            
        # second matching attempt
    
        listofpotmatches = [i for i in potentialmatches['voorlachternaam']]
            
        match_prelim = match.extractOne(nameofpoliticianindf, listofpotmatches, match_type='jaro_winkler')
        if match_prelim[1] > 0.90:
            match_defin = match_prelim[0]
            #Find the b1-nummer and append that to the b1_matched column
            matched_b1_nummer.append(polidlist[polidlist['voorlachternaam'] == match_defin]['b1-nummer'].iloc[0])
        
            continue
        
        # third matching attempt - try to do the same but now with simpler names
        nameofpoliticianindf_2 = " ".join(re.sub("Van De |Van Der |van de |van der |van den |van |Van der |Van |de ", "", 
                                      nameofpoliticianindf).split())
        
        match_prelim = match.extractOne(nameofpoliticianindf_2, listofpotmatches, match_type='jaro_winkler')
        if match_prelim[1] > 0.80:
            match_defin = match_prelim[0]
            #Find the b1-nummer and append that to the b1_matched column
            matched_b1_nummer.append(polidlist[polidlist['voorlachternaam'] == match_defin]['b1-nummer'].iloc[0])
        
            continue
        
        # fourth matching attempt - on last name only      
        listofpotmatches = [i for i in potentialmatches['achternaam']]
        
        editedname = (" ".join(re.sub("Van De |Van Der |van de |van der |van den |van |Van der |Van |de ", "", 
                                      nameofpoliticianindf).split()))
        
        match_prelim = process.extractOne(editedname, listofpotmatches)        
        if match_prelim[1] > 0.80:
            match_defin = match_prelim[0]
            matched_b1_nummer.append(polidlist[polidlist['achternaam'] == match_defin]['b1-nummer'].iloc[0])
            
            continue
        
        else:
            
            match_defin = "No match found"
            print(i, "No match found")
            # Find the b1-nummer and append that to the b1_matched column
            matched_b1_nummer.append("No match found")
            
        
# Finally (outside the for-loop), append the b1_matched column to the dataframe and then merge it (left_join)
    df['matched b1-nummer'] = pd.Series(matched_b1_nummer)
    print(len(matched_b1_nummer))
    df = df.merge(polidlist, how="left", left_on = ["matched b1-nummer"], right_on="b1-nummer")

    return df

In [18]:
matches_with_politicians = find_polid(close)

100%|██████████| 324/324 [00:31<00:00, 10.13it/s]

324





In [19]:
# These fail to be matched so far
matches_with_politicians[['Naam_y', 'Verkiezingdatum', 'matched b1-nummer','achternaam', 'voorletters']][matches_with_politicians['matched b1-nummer'] == "No match found"]

Unnamed: 0,Naam_y,Verkiezingdatum,matched b1-nummer,achternaam,voorletters


In [20]:
# Here I check the performance
# pd.set_option('display.max_rows', 237)
# matches_with_politicians[['Naam_y', 'Verkiezingdatum', 'matched b1-nummer', 'achternaam', 'voorletters']].head(237)

# Merging with politician wealth, and comparing wealth

First, we extract only the variables we need. Then, we merge with `wealth_politicians.csv`, and generate a graph:


In [21]:
matches_with_politicians

Unnamed: 0,Naam_x,District,Verkiezingdatum,InTKEK,Gevonden,Gebdatum,Sterfdatum,Sterfplaats,Vermogen,Link1,...,achternaam,voorletters,voorna(a)m(en),roepnaam,prepositie,geslacht,partij(en)/fractie(s),begin periode,einde periode,voorlachternaam
0,mr. J. van Riemsdijk,Almelo,02/01/1849,nee,ja,,01/11/1879,Almelo,29618.27,https://www.genealogieonline.nl/en/genealogie-...,...,Donker Curtius,D.,Dirk,Dirk,,m,'pragmatisch' liberaal,1849-02-13,1850-08-19,D. Donker Curtius
1,D. Mulder Dzn.,Winschoten,12/03/1878,nee,ja,27/11/1825,17/09/1889,Amsterdam,1018.44,https://genealogiewinschoten.nl/diversen/oud-n...,...,Vos van Steenwijk,J.W.J.,Jan Willem Jacobus,,baron de,m,liberaal;Liberale Unie,1878-03-18,1888-03-26,J.W.J. Vos van Steenwijk
2,D. Mulder Dzn.,Winschoten,10/06/1879,nee,ja,27/11/1825,17/09/1889,Amsterdam,1018.44,https://genealogiewinschoten.nl/diversen/oud-n...,...,Vos van Steenwijk,J.W.J.,Jan Willem Jacobus,,baron de,m,liberaal;Liberale Unie,1878-03-18,1888-03-26,J.W.J. Vos van Steenwijk
3,mr. S.J. baron van Pallandt,Almelo,08/06/1875,nee,ja,,20/08/1890,,1076713.3,,...,Corver Hooft,J.R.,Jan Reijnaud,,,m,conservatief,1875-09-20,1887-08-16,J.R. Corver Hooft
4,mr. S.J. baron van Pallandt,Almelo,22/06/1875,nee,ja,,20/08/1890,,1076713.3,,...,Corver Hooft,J.R.,Jan Reijnaud,,,m,conservatief,1875-09-20,1887-08-16,J.R. Corver Hooft
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319,mr. E.P.F.A. van den Bogaert,Amsterdam,10/04/1894,nee,ja,,23/06/1927,Arnhem,,https://www.delpher.nl/nl/kranten/view?coll=dd...,...,Vrolik,W.K.M.,Willem Karel Marie,,,m,Liberale Unie,1891-09-15,1894-03-19,W.K.M. Vrolik
320,mr. E.P.F.A. van den Bogaert,Amsterdam,10/04/1894,nee,ja,,23/06/1927,Arnhem,,https://www.delpher.nl/nl/kranten/view?coll=dd...,...,Treub,M.W.F.,Marie Willem Frederik,Willem,,m,VDB;Economische Bond;Neutrale fractie;Vrijheid...,1904-11-08,1921-10-14,M.W.F. Treub
321,mr. E.P.F.A. van den Bogaert,Amsterdam,10/04/1894,nee,ja,,23/06/1927,Arnhem,,https://www.delpher.nl/nl/kranten/view?coll=dd...,...,Gleichman,J.G.,Johan George,,,m,liberaal;oud- of vrije liberalen,1880-09-21,1901-06-08,J.G. Gleichman
322,mr. E.P.F.A. van den Bogaert,Amsterdam,10/04/1894,nee,ja,,23/06/1927,Arnhem,,https://www.delpher.nl/nl/kranten/view?coll=dd...,...,Rutgers van Rozenburg,J.W.H.,Jan Willem Hendrik,,,m,liberaal;oud- of vrije liberalen,1869-09-21,1897-09-20,J.W.H. Rutgers van Rozenburg


In [22]:
pd.set_option('display.max_rows', 15)

matches_with_politicians = matches_with_politicians[['Naam_x', 'District', 'Verkiezingdatum', 'Sterfdatum',
                          'Vermogen', 'Naam_y',
                          'matched b1-nummer']]

In [23]:
wealth_politicians = pd.read_csv("../Data/politician_data/wealth_politicians.csv")

In [24]:
matched_sample = pd.merge(matches_with_politicians, wealth_politicians[['indexnummer', 'nw0']], 
         how = "left",
         left_on = "matched b1-nummer", 
         right_on = "indexnummer")


## Deflating Wealth

Finally, the two wealth variables should be deflated to 1900 guilders. We can do this for each column separately. `Vermogen` is for the non-politician, and `nw_0` is for the politician. We create a function that inputs this dataframe, and outputs the same dataframe with two new columns, `Vermogen_deflated` and `nw0_deflated`, which we can use in the analysis. 

In [25]:
# Helper function
def tryextract(x):
    
    try: 
        out = pd.to_numeric(x[2])
    except:
        out = 999
    return(out)

# Deflate function
def deflate(df):
    
    # Load deflator data
    deflator = read_ods("../Data/politician_data/memories_invoer.ods", sheet = 3).iloc[:,[0,2,3]]
    
    ## create a year variable (for non-pols) from Sterfdatum
    df['Year'] = df['Sterfdatum'].str.split('-|/').apply(lambda x: tryextract(x))
    
    ## create a list with deflated wealth for non-pols
    wealth_def = []
    
    ## Deflate wealth for the non-politicians (Sterfdatum already there)
    for i in range(len(df)):
        try:
            multiply = deflator[deflator['Year'] == pd.to_numeric(df['Year'][i])].iloc[0].iloc[2]
            wealth_def.append(pd.to_numeric(df['Vermogen'][i]) * multiply)
            
        except: 
            wealth_def.append(None)
            
    df['Vermogen_deflated'] = pd.Series(wealth_def)
    
    # pols
    ## Find DoD for the politicians
    deathdates_pol = pd.read_excel("../Data/politician_data/tk_1815tot1950uu.xlsx", sheet_name = 1, dtype={'b1-nummer': str})
    deathdates_pol = deathdates_pol[deathdates_pol['rubriek'] == 3020][['b1-nummer','datum']]
    
    df = pd.merge(df, deathdates_pol, how="left", left_on = 'matched b1-nummer', right_on = 'b1-nummer').rename(columns={'datum':'Sterfdatum_pols'})
    
    ## Generate a year for the pols
    df['Year2'] = df['Sterfdatum_pols'].str.split('-|/').apply(lambda x: tryextract(x))
    
    ## Deflate wealth for the politicians
    wealth_def_pol = []
    
    for j in range(len(df)):
        try:
            multiply = deflator[deflator['Year'] == pd.to_numeric(df['Year2'][j])].iloc[0].iloc[2]
            wealth_def_pol.append(pd.to_numeric(df['nw0'][j]) * multiply)
            
        except:
            wealth_def_pol.append(None)
    
    
    df['nw0_deflated'] = pd.Series(wealth_def_pol)
    
    #delete year (for nonpols) again; delete year (for pols) again
    del df['Year']; del df['b1-nummer']; del df['Sterfdatum_pols']; del df['Year2']; del df['indexnummer']
    
    return(df)
    
    

In [26]:
matched_sample = deflate(matched_sample)

## Conclusion

This is everything we want to do in this notebook. In another notebook, we shall try to analyze this dataset. 

In [27]:
matched_sample.to_csv("../Data/analysis/matched_sample_analysis.csv")