<h3> Data Cleaning </h3>

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from rapidfuzz import process
from os.path import exists
import itertools


In [3]:
def stringConvert(x):
    return x if type(x) == str else ""

In [4]:
#function that makes dictionary that combines 3 full name columns into 1
def genFullNameDict(df):    
    namemerge_pre = dict()
    for x in df.index:
        marker = False
        row = df.loc[x][['full name 1', 'full name 2', 'full name 3']]
        #identify number of unique names
        if len(row[row.notna()].unique()) > 1:
            row_names = row[row.notna()].unique()
            for name in row_names:
                if name in namemerge_pre.keys() and not marker:
                    #add to dictionary if entry already exists
                    namemerge_pre[name].extend([n for n in row_names if 
                                                n != name and n not in namemerge_pre[name]])
                    marker = True
            #add to dictionary using shortest name as key
            if not marker:
                minname = row_names[0]
                for name in row_names:
                    if len(name) < len(minname):
                        minname = name
                namemerge_pre[minname] = [n for n in row_names if n != minname]
    
    #invert the dictionary - swap keys and values
    namemerge = dict()
    for key in namemerge_pre.keys():
        vals = namemerge_pre[key]
        #iterate through list of values
        for val in vals:
            namemerge[val] = key
    
    return namemerge

In [5]:
#use the fullname 1/2/3 columns to generate full name column
def setFullName(df):
    for x in df.index:
        marker = False
        row = df.loc[x][['full name 1', 'full name 2', 'full name 3']]
        if len(row[row.notna()].unique()) == 0:
            df.loc[x, 'full name'] = np.nan
        else:
            df.loc[x, 'full name'] = row[row.notna()].unique()[0]
    return df

In [6]:
def genFuzzyDict(df):
    namelst = df['full name'].drop_duplicates()
    fn_fuzzy_pre = dict()
    for name in namelst:
        marker = False
        if not pd.isnull(name):
            #find matches for name
            match = process.extract(name, [x for x in namelst if x != name and not 
                                           pd.isnull(x)], limit = 1, score_cutoff = 95)
            if len(match)> 0:
                match = match[0]
                if match[1]>95:
                    #add suitable matches to dictionary
                    for nm in [match[0], name]:
                        if nm in fn_fuzzy_pre.keys() and not marker:
                            fn_fuzzy_pre[nm].extend([n for n in [match[0], name] if 
                                                     n != nm and n not in fn_fuzzy_pre[nm]])
                            marker = True
                    if not marker:
                        if len(name) < len(match[0]):
                            fn_fuzzy_pre[name] = [match[0]]
                        else:
                            fn_fuzzy_pre[match[0]] = [name]
    #invert dictionary
    fn_fuzzy = dict()
    for key in fn_fuzzy_pre.keys():
        vals = fn_fuzzy_pre[key]
        for val in vals:
            fn_fuzzy[val] = key
    
    return fn_fuzzy

In [7]:
#separate a string that contains two names into a list of two names
def parseNames(x):
    #replace words that don't have meaning
    x = x.replace("and Co", "").replace("and co", "").replace("and Others" ,"")
    x = x.replace("and others", "").replace("and Son", "").replace("and Sons", "")
    x = x.replace("and Brothers", "").strip()
    #string preprocessing
    namelst = x.split(" and ")
    namelst = [x.strip() for x in namelst if x.strip() != ""]
    if len(namelst) > 1:
        wd1len = len(namelst[0].split(" "))
        wd2len = len(namelst[1].split(" "))
        #add last name
        if wd1len == 1 and wd2len != 1:
            namelst[0] = namelst[0] + " " + namelst[1].split(" ")[-1]
    return namelst

In [8]:
def transformdf(df, state):
    #add full name columns
    df['full name 1'] = (df['First Name'].apply(lambda x: stringConvert(x))
                         + " " + 
                         df['Last Name'].apply(lambda x: stringConvert(x)))
    df['full name 1'] = df['full name 1'].apply(lambda x: x if len(x.strip().split(" ")) > 1 else np.nan)
    df['full name 2'] = (df['First Name.1'].apply(lambda x: stringConvert(x))
                         + " " + 
                         df['Last Name.1'].apply(lambda x: stringConvert(x)))
    df['full name 2'] = df['full name 2'].apply(lambda x: x if len(x.strip().split(" ")) > 1 else np.nan)
    df['full name 3'] = (df['First Name.2'].apply(lambda x: stringConvert(x))
                         + " " + 
                         df['Last Name.2'].apply(lambda x: stringConvert(x)))
    df['full name 3'] = df['full name 3'].apply(lambda x: x if len(x.strip().split(" ")) > 1 else np.nan)
    df['state'] = state
    #add dicionary to merge different full name columns into one
    namemerge = genFullNameDict(df)
    for col in ['full name 1', 'full name 2', 'full name 3']:    
        df[col] = df[col].apply(lambda x: namemerge[x] 
                                if x in namemerge.keys() else x)

    df['full name'] = np.nan
    df = setFullName(df)
    
    #fuzzy matching for different names in the full name column
    fn_fuzzy = genFuzzyDict(df)
    df['full name'] = df['full name'].apply(lambda x: x if x not in fn_fuzzy.keys() else fn_fuzzy[x])
    
    df_agg = df[['full name', 'state', '6p_Dollar', '6p_Cents', 
                 '6p_def_Dollar', '6p_def_Cents', '3p_Dollar', '3p_Cents']]
    
    #do some final preprocessing
    df_agg = df_agg[df_agg['full name'].apply(lambda x: not pd.isnull(x))]
    df_ind = df_agg[df_agg['full name'].apply(lambda x: len(x.strip().split(" ")) > 2 
                                              and " and " in x)].index
    df_agg.loc[df_ind, 'full name'] = df_agg.loc[df_ind]['full name'].apply(lambda x: parseNames(x))
    df_agg['full name'] = df_agg['full name'].apply(lambda x: [x.strip()] if type(x) != list else x)
    
    return df_agg

In [9]:
def transformonecoldf(df, state):
    #do transformdf but for when you only have one full name oclumn
    df['full name'] =  df['First Name'] + " " + df['Last Name']
    df['state'] = state
    fn_fuzzy = genFuzzyDict(df)
    df['full name'] = df['full name'].apply(lambda x: 
                                            x if x not in fn_fuzzy.keys() else fn_fuzzy[x])    
    df_agg = df[['full name', 'state', '6p_Dollar', '6p_Cents', 
                 '6p_def_Dollar', '6p_def_Cents', '3p_Dollar', '3p_Cents']]
    
    df_agg = df_agg[df_agg['full name'].apply(lambda x: not pd.isnull(x))]
    df_ind = df_agg[df_agg['full name'].apply(lambda x: len(x.strip().split(" ")) > 2 
                                              and " and " in x)].index
    df_agg.loc[df_ind, 'full name'] = df_agg.loc[df_ind]['full name'].apply(lambda x: parseNames(x))
    df_agg['full name'] = df_agg['full name'].apply(lambda x: [x.strip()] if type(x) != list else x)
    return df_agg

# Connecticut Continental Debt Dataset Matching

In [416]:
#prepare loan dataset
CT_CD = pd.read_excel("Data/Post1790/CT/CT_post1790_CD_ledger.xlsx", 
                      header = 13, usecols = 'H, I, N, O, X, Y, AD, AE, AN, AO, AT, AU')
CT_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 'First Name.1', 'Last Name.1', '6p_def_Dollar', '6p_def_Cents',
                  'First Name.2', 'Last Name.2', '3p_Dollar', '3p_Cents']
CT_CD_agg = transformdf(CT_CD, 'CT')

# Maryland Continental Debt Dataset Matching

In [417]:
#prepare loan dataset
MD_CD = pd.read_excel("Data/Post1790/MD/MD_post1790_CD.xlsx", 
                      header = 11, usecols = 'G, H, L, M, U, V, Z, AA, AI, AJ, AN, AO')
MD_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 'First Name.1', 'Last Name.1', '6p_def_Dollar', '6p_def_Cents',
                 'First Name.2', 'Last Name.2', '3p_Dollar', '3p_Cents']
MD_CD_agg = transformdf(MD_CD, 'MD')

In [418]:
cumulative_CD = pd.concat([CT_CD_agg, MD_CD_agg])

# North Carolina Continental Debt Dataset Matching

In [419]:
#prepare loan dataset
NC_CD = pd.read_excel("Data/Post1790/NC/T695_R4_NC_CD.xlsx", 
                      header = 11, usecols = 'J, K, W, X, Z, AA, AC, AD ')
NC_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 '6p_def_Dollar', '6p_def_Cents', '3p_Dollar', '3p_Cents']
NC_CD_agg = transformonecoldf(NC_CD, 'NC')

In [420]:
cumulative_CD = pd.concat([NC_CD_agg, cumulative_CD])

# New Hampshire Continental Debt Dataset Matching

In [421]:
#prepare loan dataset
NH_CD = pd.read_excel("Data/Post1790/NH/T652_R6_New_Hampshire_CD.xlsx", 
                      header = 10, usecols = 'I, J, N, O, P, Q, R, S')
NH_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents',  
                 '6p_def_Dollar', '6p_def_Cents', '3p_Dollar', '3p_Cents']
NH_CD_agg = transformonecoldf(NH_CD, 'NH')

In [422]:
cumulative_CD = pd.concat([NH_CD_agg, cumulative_CD])

# New York Continental Debt Dataset Matching

In [423]:
#prepare loan dataset
NY_CD = pd.read_excel("Data/Post1790/NY/NY_1790_CD.xlsx", 
                      header = 11, usecols = 'H, I, M, N, X, Y, AC, AD, AM, AN, AR, AS')
NY_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 'First Name.1', 'Last Name.1', '6p_def_Dollar', '6p_def_Cents',
                 'First Name.2', 'Last Name.2', '3p_Dollar', '3p_Cents']
NY_CD_agg = transformdf(NY_CD, 'NY')

In [424]:
cumulative_CD = pd.concat([NY_CD_agg, cumulative_CD])

# South Carolina Continental Debt Dataset Matching

In [425]:
#prepare loan dataset
SC_CD = pd.read_excel("Data/Post1790/SC/Post_1790_South_Carolina_CD.xlsx", 
                      header = 11, usecols = 'D, E, M, N, S, T, AB, AC, AH, AI, AQ, AR')
SC_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 'First Name.1', 'Last Name.1', '6p_def_Dollar', '6p_def_Cents',
                 'First Name.2', 'Last Name.2', '3p_Dollar', '3p_Cents']
SC_CD_agg = transformdf(SC_CD, 'SC')

In [426]:
cumulative_CD = pd.concat([SC_CD_agg, cumulative_CD])

# Pennsylvania Continental Debt Dataset Matching

In [427]:
#prepare loan dataset
PA_CD = pd.read_excel("Data/Post1790/PA/PA_post1790_CD.xlsx", 
                      header = 11, usecols = 'G, H, L, M, U, V, Z, AA, AI, AJ, AO, AP')
PA_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 'First Name.1', 'Last Name.1', '6p_def_Dollar', '6p_def_Cents',
                 'First Name.2', 'Last Name.2', '3p_Dollar', '3p_Cents']
PA_CD_agg = transformdf(PA_CD, 'PA')

In [428]:
cumulative_CD = pd.concat([PA_CD_agg, cumulative_CD])

# Rhode Island Continental Debt Dataset Matching

In [429]:
#prepare loan dataset
RI_CD = pd.read_excel("Data/Post1790/RI/T653_Rhode_Island_CD.xlsx", 
                      header = 11, usecols = 'G, H, L, M, U, V, Z, AA, AI, AJ, AN, AO')
RI_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 'First Name.1', 'Last Name.1', '6p_def_Dollar', '6p_def_Cents',
                'First Name.2', 'Last Name.2', '3p_Dollar', '3p_Cents']
RI_CD_agg = transformdf(RI_CD, 'RI')

In [430]:
cumulative_CD = pd.concat([RI_CD_agg, cumulative_CD])

# Virginia Continental Debt Dataset Matching

In [431]:
#prepare loan dataset
VA_CD = pd.read_excel("Data/Post1790/VA/VA_CD.xlsx", 
                      header = 11, usecols = 'H, I, K, L, U, V, X, Y, AH, AI, AK, AL')
VA_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents', 
                 'First Name.1', 'Last Name.1', '6p_def_Dollar', '6p_def_Cents',
                'First Name.2', 'Last Name.2', '3p_Dollar', '3p_Cents']
VA_CD_agg = transformdf(VA_CD, 'VA')

In [432]:
cumulative_CD = pd.concat([VA_CD_agg, cumulative_CD])

# Georgia Continental Debt Dataset Matching

In [433]:
#prepare loan dataset
GA_CD = pd.read_excel("Data/Post1790/GA/T694_GA_Loan_Office_CD.xlsx", 
                      header = 10, usecols = 'Q, R, Z, AA, AB, AC, AD, AE')
GA_CD.columns = ['First Name', 'Last Name', '6p_Dollar', '6p_Cents',  
                 '6p_def_Dollar', '6p_def_Cents', '3p_Dollar', '3p_Cents']
GA_CD_agg = transformonecoldf(GA_CD, 'GA')

In [434]:
cumulative_CD = pd.concat([GA_CD_agg, cumulative_CD])

# New Jersey Continental Debt Dataset Matching

In [435]:
#prepare loan dataset
#new jersey is handled manually because it only has 3 percent stock
NJ_CD = pd.read_excel("Data/Post1790/NJ/NJ_3_percent_stock_T698_R1_R2.xlsx", 
                      header = 11, usecols = 'D, E, L, M')
NJ_CD.columns = ['First Name', 'Last Name', '3p_Dollar', '3p_Cents']
NJ_CD['full name'] =  NJ_CD['First Name'] + " " + NJ_CD['Last Name']
NJ_CD['state'] = 'NJ'
fn_fuzzy = genFuzzyDict(NJ_CD)
NJ_CD['full name'] = NJ_CD['full name'].apply(lambda x: x if x not in fn_fuzzy.keys() else fn_fuzzy[x])    
NJ_CD_agg = NJ_CD[['full name', 'state', '3p_Dollar', '3p_Cents']]

NJ_CD_agg = NJ_CD_agg[NJ_CD_agg['full name'].apply(lambda x: not pd.isnull(x))]
NJ_ind = NJ_CD_agg[NJ_CD_agg['full name'].apply(lambda x: len(x.strip().split(" ")) > 2 
                                                and " and " in x)].index
NJ_CD_agg.loc[NJ_ind, 'full name'] = NJ_CD_agg.loc[NJ_ind]['full name'].apply(lambda x: parseNames(x))
NJ_CD_agg['full name'] = NJ_CD_agg['full name'].apply(lambda x: [x.strip()] if type(x) != list else x)

# Summary Analysis

In [436]:
#create dataset in final form
cumulative_CD = pd.concat([NJ_CD_agg, cumulative_CD])

In [437]:
cumulative_CD.reset_index(drop = True, inplace = True)

In [438]:
#turn full name column from list into strings
fname = cumulative_CD['full name'].apply(pd.Series)
fname.columns = ['full name 1', 'full name 2', 'full name 3']
cumulative_CD = pd.concat([cumulative_CD, fname], axis = 1)

In [439]:
#manual data change
cumulative_CD.loc[3881, 'full name 2'] = 'Horace Johnson'
cumulative_CD.loc[5190, 'full name 2'] = 'Horace Johnson'

## How many unique individuals were issued 6 percent stocks or deferred 6 percent stocks in 1790 and after?

In [297]:
stocks_6 = cumulative_CD[['6p_Dollar', '6p_Cents', 
                          '6p_def_Dollar', '6p_def_Cents']].dropna(thresh = 1).index
print('table of number of unique individuals issued 6% stocks (normal or deferred) by state')
cumulative_CD.groupby('state')['full name'].agg(sum).apply(lambda x: len(set(x)))

table of number of unique individuals issued 6% stocks (normal or deferred) by state


state
CT    754
GA     55
MD    361
NC     54
NH    169
NJ    568
NY    915
PA    858
RI    513
SC    281
VA    523
Name: full name, dtype: int64

# How many of these individuals
- were original purchasers of loan office certicates of the same state as the 6 percent stock?
- were original purchasers of loan office certicates issued from another state?
- were original recipients of liquidated debtcerti cates issued by the same-state loan office? other state loan offices?
- were original recipients of the Pierce Certicates?

In [298]:
def fuzzy_merge(lst1, lst2, threshold=85, limit = 100):
    delegates = pd.Series([x for x in lst1.unique() if not pd.isnull(x)])
    possible =  [x for x in lst2.unique().tolist() if type(x) == str]
 
    #get matches
    #process.extract uses a combination of all four fuzzywuzzy scores
    matches = delegates.apply(lambda x: 
                              process.extract(x, possible, limit=limit, score_cutoff = threshold))
    
    match_df = pd.DataFrame(columns = ['Delegates', 'Loan Matches'])
    #make each match a row in the dataframe
    for delegate, matchset in zip(delegates, matches):
        matchset_thres = [name for name in matchset if name[1] >= threshold]
        if len(matchset_thres) == 0:
            add_df = pd.DataFrame(data = {'Delegates': [delegate], 'Loan Matches': [""], 'Scores': [0]})
            match_df = pd.concat([match_df, add_df])
        else:
            delegate_lst = [delegate] * len(matchset_thres)
            add_df = pd.DataFrame(data = {'Delegates': delegate_lst, 
                                          'Loan Matches': [x[0] for x in matchset_thres],
                                          'Scores': [x[1] for x in matchset_thres]})
            match_df = pd.concat([match_df, add_df])

    return match_df

In [299]:
#function for performing the second step of the match
def matchFunction(lst1, lst2, score = 90):
    #check if our matches are actually min 2 words each
    #make sure our match is because the individual are similar, not because the phrase or one word in the phrase is similar
    #lst1 = list(pd.Series(lst1).unique())
    #lst2 = list(pd.Series(lst2).unique())
    threshold = min(len(lst1), len(lst2))
    matches = 0
    nomatch = []
    i = 0
    for wd1 in lst1:
        #modifying which words we compare - dont want to compare first in lst1 with last in lst2
        for wd2 in lst2:
            if wd1 not in nomatch and process.extract(wd1, [wd2])[0][1] > score:
                matches+=1
                nomatch.append(wd1)
        i+=1
    return matches >= threshold

In [300]:
def produceMatches(delegates, debt, delegate_names, debt_names, threshold = 85):
    initial = True
    join_df = pd.DataFrame()
    #run firs step of matching function
    for del_name in delegate_names:
        for debt_name in debt_names:
            if initial:
                join_df = fuzzy_merge(delegates[del_name], debt[debt_name], threshold)
                initial = False
            else:
                add_df = fuzzy_merge(delegates[del_name], debt[debt_name], threshold)
                join_df = pd.concat([join_df, add_df])
    join_df = join_df.drop_duplicates().reset_index(drop = True)
    join_df = join_df[join_df['Scores'].apply(lambda x: x != 0)]
    join_df = join_df[join_df['Loan Matches'].apply(lambda x: not pd.isnull(x))]    
    #run second step of matching function
    join_df_p2 = join_df[join_df['Loan Matches'].apply(lambda x: len(list(set(x.replace("??", "").strip().split(" "))))>=2)]
    join_df_p2_final = join_df_p2[[matchFunction(x.split(" "), y.split(" ")) for x, y in zip(join_df_p2['Delegates'], join_df_p2['Loan Matches'])]]
    return join_df_p2_final

In [301]:
#import and preprocess loan office data
loan_office = pd.read_csv('Data/Pre1790/cleaned/loan_office_certificates_9_states_cleaned.csv', index_col = 0)
states = ['NH', 'MA', 'CT', 'NY', 'NJ', 'PA', 'DE', 'MD', 'VA']
num_names = [1, 2, 2, 3, 2, None, 2, None, None]
state_names = dict(zip(np.arange(1, 10, 1), states))
loan_office['State Name'] = loan_office['State'].apply(lambda x: state_names[x])
loan_office['Full Name 1'] = (loan_office['First Name 1 '].apply(lambda x: stringConvert(x)) + " " + loan_office['Last Name 1 '].apply(lambda x: stringConvert(x)))
loan_office['Full Name 2'] = (loan_office['First Name 2'].apply(lambda x: stringConvert(x)) + " " + loan_office['Last Name 2'].apply(lambda x: stringConvert(x)))
loan_office['Full Name 3'] = (loan_office['First Name 3'].apply(lambda x: stringConvert(x)) + " " + loan_office['Last Name 3'].apply(lambda x: stringConvert(x)))

### How many individuals were original purchasers of loan office certicates of the same state as the 6 percent stock?

In [302]:
#match cd debt data with loan office data from the same state
def loanOfficeSameState(state):
    #filter for 6% stock
    state_ind = cumulative_CD[cumulative_CD['state'] == state][['6p_Dollar', '6p_Cents', '6p_def_Dollar', '6p_def_Cents']].dropna(thresh = 1).index
    #skip empty dataframes
    if len(state_ind) != 0:
        #prepare state and loan office data
        state_cd = cumulative_CD.loc[state_ind][['full name 1','full name 2','full name 3', 'state']].drop_duplicates()
        state_cd.columns = ['cd name 1','cd name 2','cd name 3', 'cd state']
        loan_office_state = loan_office[loan_office['State Name'] == state][['Full Name 1', 'Full Name 2', 'Full Name 3', 'State Name']].drop_duplicates()
        loan_office_state.columns = ['loan office name 1', 'loan office name 2', 'loan office name 3', 'loan office state']
        #match data
        matches = produceMatches(state_cd, loan_office_state, delegate_names = ['cd name 1', 'cd name 2', 'cd name 3'], debt_names = ['loan office name 1', 'loan office name 2', 'loan office name 3'], threshold = 95)
        matches['state'] = state
        return matches

In [303]:
df_loanoffice_samestate = pd.DataFrame({}, columns = ['Delegates', 'Loan Matches', 'Scores', 'state'])

In [304]:
#combine matches from all the states
for state in states:
    df_loanoffice_samestate = pd.concat([df_loanoffice_samestate, loanOfficeSameState(state)])
df_loanoffice_samestate.columns = ['CD name', 'Loan Office name', 'Scores', 'state']

In [305]:
df_loanoffice_samestate = df_loanoffice_samestate[df_loanoffice_samestate['CD name'].apply(lambda x: len(x.split(" ")) > 1)]

In [306]:
#summarize results
print("Number of individuals who were original purchasers of loan office certicates of the same state as the 6 percent stock")
df_loanoffice_samestate.groupby('state')['CD name'].apply(lambda x: len(x.unique()))

Number of individuals who were original purchasers of loan office certicates of the same state as the 6 percent stock


state
CT    316
MD    103
NH     62
NY     28
PA    312
VA     61
Name: CD name, dtype: int64

In [307]:
df_loanoffice_samestate.reset_index(drop = True, inplace = True)

In [308]:
#manual edits
rem_ind = [233, 432, 491, 522, 542, 543, 609, 627, 656, 662, 740, 744, 774]
df_loanoffice_samestate = df_loanoffice_samestate.loc[[ind for ind in df_loanoffice_samestate.index if ind not in rem_ind]]

### How many individuals were original purchasers of loan office certicates issued from another state?

In [309]:
#higher match threshold for non-same state loan office certificates

In [310]:
#match cd debt data with loan office data from a different state
def loanOfficeDifState(state):
    #filter for 6% stock
    state_ind = cumulative_CD[cumulative_CD['state'] == state][['6p_Dollar', '6p_Cents', '6p_def_Dollar', '6p_def_Cents']].dropna(thresh = 1).index
    if len(state_ind) != 0:
        state_cd = cumulative_CD.loc[state_ind][['full name 1','full name 2','full name 3', 'state']].drop_duplicates()
        state_cd.columns = ['cd name 1','cd name 2','cd name 3', 'cd state']
        loan_office_nostate = loan_office[loan_office['State Name'] != state][['Full Name 1', 'Full Name 2', 'Full Name 3', 'State Name']].drop_duplicates()
        loan_office_nostate.columns = ['loan office name 1', 'loan office name 2', 'loan office name 3', 'loan office state']
        #match data
        matches = produceMatches(state_cd, loan_office_nostate, delegate_names = ['cd name 1'], debt_names = ['loan office name 1', 'loan office name 2', 'loan office name 3'], threshold = 95)
        matches['state'] = state
        return matches

In [311]:
df_loanoffice_difstate = pd.DataFrame({}, columns = ['Delegates', 'Loan Matches', 'Scores', 'state'])

In [312]:
#combine matches from all the states
for state in states:
    df_loanoffice_difstate = pd.concat([df_loanoffice_difstate, loanOfficeDifState(state)])
df_loanoffice_difstate.columns = ['CD name', 'Loan Office name', 'Scores', 'state']

In [313]:
df_loanoffice_difstate = df_loanoffice_difstate[df_loanoffice_difstate['CD name'].apply(lambda x: len(x.split(" ")) > 1)]

In [314]:
df_loanoffice_difstate.groupby('state')['CD name'].apply(lambda x: len(x.unique()))

state
CT    111
MD     59
NH     41
NY     25
PA    117
VA     98
Name: CD name, dtype: int64

In [315]:
df_loanoffice_difstate.reset_index(drop = True, inplace = True)

In [316]:
#manual edits
rem_ind = [87, 88, 89, 90, 91, 143, 144, 145, 160, 161, 204, 205, 206, 361, 362, 368, 369, 370, 371, 372, 378, 379, 380, 404, 459, 460, 461]
df_loanoffice_difstate = df_loanoffice_difstate.loc[[ind for ind in df_loanoffice_difstate.index if ind not in rem_ind]]

### How many individuals were original recipients of liquidated debt certificates issued by the same-state loan office? other state loan offices?

#### Same State

In [440]:
#match CD state data with liquidated debt from the same state
def liquidatedSameStateDebt(state, file, num_names):
    #filter for 6% stock   
    state_ind = cumulative_CD[cumulative_CD['state'] == state][['6p_Dollar', '6p_Cents']].dropna(thresh = 1).index
    if len(state_ind) != 0:
        state_cd = cumulative_CD.loc[state_ind][['full name 1','full name 2','full name 3', 'state']].drop_duplicates()
        state_cd.columns = ['cd name 1','cd name 2','cd name 3', 'cd state']
        #import liquidated state debt files
        datafile = 'Data/Pre1790/cleaned/'+file
        if exists(datafile):
            state_cert = pd.read_csv(datafile, index_col = 0)
            namelst = []
            #figure out how many full name columns there are in the state liquidated debt file
            state_cert['Full Name'] = state_cert['First name'] + " " + state_cert['Last name'] 
            namelst.append('Full Name')
            if num_names > 1:
                for i in np.arange(2, num_names+1, 1):
                    fullname_str = 'Full Name ' + str(i)
                    state_cert[fullname_str] = state_cert['First name ' + str(i)] + " " + state_cert['Last name ' + str(i)] 
                    namelst.append(fullname_str)
            state_cert_names = state_cert[namelst].drop_duplicates()
            #produce matches
            matches = produceMatches(state_cd, state_cert_names, delegate_names = ['cd name 1'], debt_names = namelst)
            matches['state'] = state
            return matches

In [441]:
df_samestateliquid = pd.DataFrame({}, columns = ['Delegates', 'Loan Matches', 'Scores', 'state'])

In [442]:
state_name = dict(zip(states, num_names))

In [443]:
#combine matches from all the states
for state, num_name in state_name.items():
    if state != "PA":
        file = 'liquidated_debt_certificates_'+state+'_cleaned.csv'
        df_samestateliquid = pd.concat([df_samestateliquid, liquidatedSameStateDebt(state, file, num_name)])
    else:
        file1 = 'liquidated_debt_certificates_PA_story_cleaned.csv'
        df1 = liquidatedSameStateDebt('PA', file1, 1)
        file2 = 'liquidated_debt_certificates_PA_stelle_cleaned.csv'
        df2 = liquidatedSameStateDebt('PA', file2, 2)
        df = pd.concat([df1, df2]).drop_duplicates()
        df_samestateliquid = pd.concat([df_samestateliquid, df])
df_samestateliquid.columns = ['CD name', 'Loan Office name', 'Scores', 'state']

In [445]:
df_samestateliquid = df_samestateliquid[df_samestateliquid['CD name'].apply(lambda x: len(x.split(" ")) > 1)]

In [446]:
#summarize results
df_samestateliquid.groupby('state')['CD name'].apply(lambda x: len(x.unique()))

state
CT    100
NH     28
NY     38
PA    213
Name: CD name, dtype: int64

In [447]:
df_samestateliquid.reset_index(drop = True, inplace = True)

In [448]:
#manual edits
rem_ind = [149, 150, 151, 176, 187, 259, 275, 291, 320, 323, 324, 364]
df_samestateliquid = df_samestateliquid.loc[[ind for ind in df_samestateliquid.index if ind not in rem_ind]]

#### Different State

In [450]:
#match CD state data with liquidated debt from a dif state
def liquidatedDifStateDebt(state):
    #filter for 6% stock   
    state_ind = cumulative_CD[cumulative_CD['state'] == state][['6p_Dollar', '6p_Cents']].dropna(thresh = 1).index
    if len(state_ind) != 0:
        state_cd = cumulative_CD.loc[state_ind][['full name 1','full name 2','full name 3', 'state']].drop_duplicates()
        state_cd.columns = ['cd name 1','cd name 2','cd name 3', 'cd state']
        #import liquidated state debt files
        match_df = pd.DataFrame({}, columns = ['Delegates', 'Loan Matches', 'Scores'])
        for statename, num_names in state_name.items():
            if not pd.isnull(num_names):
                datafile = 'Data/Pre1790/cleaned/liquidated_debt_certificates_'+statename+'_cleaned.csv'
                matches = produceLiquidatedMatches(datafile, num_names, state_cd)
                match_df = pd.concat([match_df, matches])
            elif state == 'PA':    
                datafile1 = 'Data/Pre1790/cleaned/liquidated_debt_certificates_PA_story_cleaned.csv'
                matches1 = produceLiquidatedMatches(datafile, 1, state_cd)
                datafile2 = 'Data/Pre1790/cleaned/liquidated_debt_certificates_PA_stelle_cleaned.csv'
                matches2 = produceLiquidatedMatches(datafile, 2, state_cd)
                match_df = pd.concat([match_df, matches2])
        match_df['state'] = state
        return match_df

In [452]:
df_difstateliquid = pd.DataFrame({}, columns = ['Delegates', 'Loan Matches', 'Scores', 'state'])

In [453]:
#combine matches from all the states
for state in states:
    df_difstateliquid = pd.concat([df_difstateliquid, liquidatedDifStateDebt(state)])
df_difstateliquid.columns = ['CD name', 'Loan Office name', 'Scores', 'state']

In [455]:
df_difstateliquid.reset_index(drop = True, inplace = True)

In [456]:
#manual edits
rem_ind = [760, 778, 783, 806, 878]
df_difstateliquid = df_difstateliquid.loc[[ind for ind in df_difstateliquid.index if ind not in rem_ind]]

In [457]:
df_difstateliquid.groupby('state')['CD name'].apply(lambda x: len(x.unique()))

state
CT    180
MD     41
NH     59
NY     49
PA    131
VA     57
Name: CD name, dtype: int64

### How many individuals were original recipients of the Pierce Certicates?

In [458]:
pierce = pd.read_csv('Data/Pre1790/cleaned/'+"Pierce_Certs_cleaned_2021.csv", index_col = 0)
pierce['Full Name'] = pierce['First'].apply(lambda x: stringConvert(x)) + " " + pierce['Last'].apply(lambda x: stringConvert(x))
pierce['Full Name 2'] = pierce['First 2'].apply(lambda x: stringConvert(x)) + " " + pierce['Last 2'].apply(lambda x: stringConvert(x))

In [459]:
#match cd debt data with loan office data from the Pierce certificate data
def pierceCertificates(state):
    #filter for 6% stock
    state_ind = cumulative_CD[cumulative_CD['state'] == state][['6p_Dollar', '6p_Cents', '6p_def_Dollar', '6p_def_Cents']].dropna(thresh = 1).index
    if len(state_ind) != 0:
        state_cd = cumulative_CD.loc[state_ind][['full name 1','full name 2','full name 3', 'state']].drop_duplicates()
        state_cd.columns = ['cd name 1','cd name 2','cd name 3', 'cd state']
        #match data
        pierce_names = pierce[pierce['State'].apply(lambda x: pd.isnull(x) or x == state)][['Full Name', 'Full Name 2']].drop_duplicates()
        matches = produceMatches(state_cd, pierce_names, delegate_names = ['cd name 1'], debt_names = ['Full Name', 'Full Name 2'])
        matches['state'] = state
        return matches

In [460]:
df_pierce = pd.DataFrame({}, columns = ['Delegates', 'Loan Matches', 'Scores', 'state'])

In [461]:
for state in states:
    df_pierce = pd.concat([df_pierce, pierceCertificates(state)])
df_pierce.columns = ['CD name', 'Loan Office name', 'Scores', 'state']

In [462]:
df_pierce = df_pierce[df_pierce['CD name'].apply(lambda x: len(x.split(" ")) > 1)]

In [463]:
df_pierce.groupby('state')['CD name'].apply(lambda x: len(x.unique()))

state
CT    177
MD     84
NH     38
NY     29
PA    171
VA    143
Name: CD name, dtype: int64

In [464]:
df_pierce.reset_index(drop = True, inplace = True)

In [465]:
#manual edits
rem_ind = [3, 5, 35, 39, 40, 104, 138, 139, 145, 146, 206, 207, 208, 209, 210, 276, 278, 279, 280, 281, 282, 283, 286, 287, 288, 289, 
           293, 303, 304, 305, 306, 307, 318, 321, 322, 339, 340, 341, 342, 361, 362, 376, 377, 381, 389, 390, 397, 398, 399, 414, 415, 
           416, 417, 418, 419, 423, 424, 425, 426, 427, 428, 429, 436, 436, 445, 446, 543, 584, 585, 586, 587, 600, 634, 635, 636, 637, 
           641, 652, 653, 654, 655, 656, 657, 658, 670, 671]
df_pierce = df_pierce.loc[[ind for ind in df_pierce.index if ind not in rem_ind]]

## Organizing all our results into one table

In [466]:
#column to add the matching names into a total table containing cd loan names
#and corresopnding match names for each pre1790 loan times
def mergeNames(colname, df):
    loss_dict = dict(df.groupby('CD name')['Loan Office name'].apply(lambda x: list(x)))
    cumulative_CD[colname + ' 1'] = cumulative_CD['full name 1'].apply(lambda name: loss_dict.get(name, np.nan))
    cumulative_CD[colname + ' 2'] = cumulative_CD['full name 2'].apply(lambda name: loss_dict.get(name, np.nan))
    cumulative_CD[colname + ' 3'] = cumulative_CD['full name 3'].apply(lambda name: loss_dict.get(name, np.nan))
    cumulative_CD[colname] = pd.Series(cumulative_CD[[colname + ' 1', colname + ' 2', colname + ' 3']].values.tolist()).apply(lambda lst: [x for x in lst if type(x) != float])
    cumulative_CD[colname] = cumulative_CD[colname].apply(lambda x: list(set(list(itertools.chain.from_iterable(x)))) if x != [] else np.nan)
    cumulative_CD.drop([colname + ' 1', colname + ' 2', colname + ' 3'], inplace = True, axis = 1)

In [467]:
#run process on all four pre1790 loan types
mergeNames('Same State Loan Office', df_loanoffice_samestate)
mergeNames('Different State Loan Office', df_loanoffice_difstate)
mergeNames('Same State Liquidated Debt', df_samestateliquid)
mergeNames('Different State Liquidated Debt', df_difstateliquid)
mergeNames('Pierce Certificates', df_pierce)

In [472]:
#number of loan types one person had
cumulative_CD['tot_count'] = 5 - cumulative_CD[['Same State Loan Office',
                                                'Different State Loan Office',
                                                'Same State Liquidated Debt',
                                                'Different State Liquidated Debt',
                                                'Pierce Certificates']].isna().sum(axis = 1)
cumulative_CD.drop(['3p_Cents','3p_Dollar', 
                    'full name 1','full name 2','full name 3'], axis = 1, inplace = True)

In [473]:
#some preprocesing - creating dictionaries to add back in data, after I turned the list of names into a string to remove duplicates
#the original data we imported above was lost so we readd it by creating dictionaries
cumulative_CD['str name'] = cumulative_CD['full name'].apply(lambda x: str(x)) + "____" + cumulative_CD['state']
fullnamedict = dict(zip(cumulative_CD['str name'], cumulative_CD['full name']))
statenamedict = dict(zip(cumulative_CD['str name'], cumulative_CD['state']))
sslodict = dict(zip(cumulative_CD['str name'], cumulative_CD['Same State Loan Office']))
dslodict = dict(zip(cumulative_CD['str name'], cumulative_CD['Different State Loan Office']))
sslddict = dict(zip(cumulative_CD['str name'], cumulative_CD['Same State Liquidated Debt']))
dslddict = dict(zip(cumulative_CD['str name'], cumulative_CD['Different State Liquidated Debt']))
pcdict = dict(zip(cumulative_CD['str name'], cumulative_CD['Pierce Certificates']))

In [477]:
#use dictionaries to link data to matched values for each loan type
cumulative_CD_assets = cumulative_CD.groupby('str name')['6p_Cents','6p_Dollar',
                                                         '6p_def_Cents','6p_def_Dollar'].sum()
cumulative_CD_assets.reset_index(inplace = True)
cumulative_CD_assets['full name'] = cumulative_CD_assets['str name'].apply(lambda x: 
                                                                           fullnamedict[x])
cumulative_CD_assets['state'] = cumulative_CD_assets['str name'].apply(lambda x: 
                                                                       statenamedict[x])
cumulative_CD_assets['Same State Loan Office'] = cumulative_CD_assets['str name'].apply(lambda x: 
                                                                                        sslodict[x])
cumulative_CD_assets['Different State Loan Office'] = cumulative_CD_assets['str name'].apply(lambda x: 
                                                                                             dslodict[x])
cumulative_CD_assets['Same State Liquidated Debt'] = cumulative_CD_assets['str name'].apply(lambda x: 
                                                                                            sslddict[x])
cumulative_CD_assets['Different State Liquidated Debt'] = cumulative_CD_assets['str name'].apply(lambda x: 
                                                                                                 dslddict[x])
cumulative_CD_assets['Pierce Certificates'] = cumulative_CD_assets['str name'].apply(lambda x: 
                                                                                     pcdict[x])
cumulative_CD_assets['Total'] = (cumulative_CD_assets['6p_def_Cents'] + 
                                 cumulative_CD_assets['6p_Cents'])/100 + (cumulative_CD_assets['6p_Dollar'] + 
                                                                          cumulative_CD_assets['6p_def_Dollar'])
cumulative_CD_assets.drop(['6p_def_Cents', '6p_Cents', 
                           '6p_Dollar', '6p_def_Dollar', 'str name'], axis = 1, inplace = True)

### Same State Loan Office

In [479]:
#add asset counts for each individual on each row to the state loan office table for the same state merging method
def ssloTotal(ind):
    name_options = cumulative_CD_assets.loc[ind, 'Same State Loan Office']
    state = cumulative_CD_assets.loc[ind, 'state']
    state_office = loan_office[loan_office['State Name'] == state]
    ind1 = state_office[state_office['Full Name 1'].apply(lambda x: x in name_options)].index.tolist()
    ind2 = state_office[state_office['Full Name 2'].apply(lambda x: x in name_options)].index.tolist()
    ind3 = state_office[state_office['Full Name 3'].apply(lambda x: x in name_options)].index.tolist()
    ind1.extend(ind2)
    ind1.extend(ind3)
    total_val = state_office.loc[ind1]['Specie Value '].sum()
    return total_val

In [480]:
cumulative_CD_assets['SSLO Total'] = np.nan
ssloIndex = cumulative_CD_assets[cumulative_CD_assets['Same State Loan Office'].apply(lambda x: type(x) == list)].index
cumulative_CD_assets.loc[ssloIndex, 'SSLO Total'] = [ssloTotal(x) for x in ssloIndex]
cumulative_CD_assets['SSLO Total'] = cumulative_CD_assets['SSLO Total'].fillna(0)

### Different State Loan Office

In [481]:
#add asset counts for each individual on each row to the state loan office table for the dif state merging method
def dsloTotal(ind):
    name_options = cumulative_CD_assets.loc[ind, 'Different State Loan Office']
    state = cumulative_CD_assets.loc[ind, 'state']    
    state_office = loan_office[loan_office['State Name'] != state]
    ind1 = state_office[state_office['Full Name 1'].apply(lambda x: x in name_options)].index.tolist()
    ind2 = state_office[state_office['Full Name 2'].apply(lambda x: x in name_options)].index.tolist()
    ind3 = state_office[state_office['Full Name 3'].apply(lambda x: x in name_options)].index.tolist()
    ind1.extend(ind2)
    ind1.extend(ind3)
    total_val = state_office.loc[ind1]['Specie Value '].sum()
    return total_val

In [482]:
cumulative_CD_assets['DSLO Total'] = np.nan
dsloIndex = cumulative_CD_assets[cumulative_CD_assets['Different State Loan Office'].apply(lambda x: 
                                                                                           type(x) == list)].index
cumulative_CD_assets.loc[dsloIndex, 'DSLO Total'] = [dsloTotal(x) for x in dsloIndex]
cumulative_CD_assets['DSLO Total'] = cumulative_CD_assets['DSLO Total'].fillna(0)

### Same State Liquidated Debt Certificates

In [486]:
#function to add values of liquidated debt certificates
def ssldTotal(ind):
    name_options = cumulative_CD_assets.loc[ind, 'Same State Liquidated Debt']
    state = cumulative_CD_assets.loc[ind, 'state']
    
    if state != 'PA' and state in state_name.keys():
        num_names = state_name[state]
        state_certs_file = 'Data/Pre1790/cleaned/liquidated_debt_certificates_'+state+'_cleaned.csv'
        if exists(state_certs_file):
            total_val = calculateTotalValue(state_certs_file, num_names, name_options)
            return total_val
    elif state == 'PA':
        state_certs_file1 = 'Data/Pre1790/cleaned/liquidated_debt_certificates_PA_story_cleaned.csv'
        total_val1 = calculateTotalValue(state_certs_file1, 1, name_options)
        state_certs_file2 = 'Data/Pre1790/cleaned/liquidated_debt_certificates_PA_stelle_cleaned.csv'
        total_val2 = calculateTotalValue(state_certs_file2, 2, name_options)
        return total_val1 + total_val2
    return 0

In [484]:
#calculate total value held by one person in debt certificates from a particular state
def calculateTotalValue(file, num_names, name_options):
    #this part is pretty similar to the merging part for liquidated debt certificates
    state_cert = pd.read_csv(file, index_col = 0)
    state_cert['Full Name'] = state_cert['First name'] + " " + state_cert['Last name'] 
    namelst = []
    namelst.append('Full Name')
    if num_names > 1:
        for i in np.arange(2, num_names+1, 1):
            fullname_str = 'Full Name ' + str(i)
            state_cert[fullname_str] = state_cert['First name ' + str(i)] + " " + state_cert['Last name ' + str(i)] 
            namelst.append(fullname_str)
    ind = []
    for name in namelst:
        ind.extend(state_cert[state_cert[name].apply(lambda x: x in name_options)].index.tolist())
    #create subtable for the data we want, make it into a numeric value and sum it
    subtbl = state_cert.loc[ind]
    subtbl['Dollars'] = subtbl['Dollars'].apply(lambda x: float(x))
    subtbl['90th'] = subtbl['90th'].apply(lambda x: float(x) if x != '22/8' else 22/8)
    total_val = subtbl['Dollars'].sum() + subtbl['90th'].sum()/90
    return total_val

In [487]:
cumulative_CD_assets['SSLD Total'] = np.nan
ssldIndex = cumulative_CD_assets[cumulative_CD_assets['Same State Liquidated Debt'].apply(lambda x: 
                                                                                          type(x) == list)].index
cumulative_CD_assets.loc[ssldIndex, 'SSLD Total'] = [ssldTotal(x) for x in ssldIndex]
cumulative_CD_assets['SSLD Total'] = cumulative_CD_assets['SSLD Total'].fillna(0)

### Different State Liquidated Debt Certificates

In [510]:
#function to add values of liquidated debt certificates
def dsldTotal(ind):
    name_options = cumulative_CD_assets.loc[ind, 'Different State Liquidated Debt']
    state = cumulative_CD_assets.loc[ind, 'state']
    sumval = 0
    for statename in states:
        if statename != 'PA' and not pd.isnull(state_name[statename]):
            num_names = state_name[statename]
            state_certs_file = 'Data/Pre1790/cleaned/liquidated_debt_certificates_'+statename+'_cleaned.csv'
            if exists(state_certs_file):
                total_val = calculateTotalValue(state_certs_file, 
                                                num_names, name_options)
            sumval = sumval + total_val
        elif statename == 'PA':
            state_certs_file1 = 'Data/Pre1790/cleaned/liquidated_debt_certificates_PA_story_cleaned.csv'
            total_val1 = calculateTotalValue(state_certs_file1, 
                                             1, name_options)
            state_certs_file2 = 'Data/Pre1790/cleaned/liquidated_debt_certificates_PA_stelle_cleaned.csv'
            total_val2 = calculateTotalValue(state_certs_file2, 
                                             2, name_options)
            sumval = sumval + total_val1 + total_val2
    return sumval

In [511]:
#calculate total value held by one person in debt certificates from a particular state
def calculateTotalValue(file, num_names, name_options):
    #this part is pretty similar to the merging part for liquidated debt certificates
    state_cert = pd.read_csv(file, index_col = 0)
    state_cert['Full Name'] = state_cert['First name'] + " " + state_cert['Last name'] 
    namelst = []
    namelst.append('Full Name')
    if num_names > 1:
        for i in np.arange(2, num_names+1, 1):
            fullname_str = 'Full Name ' + str(i)
            state_cert[fullname_str] = state_cert['First name ' + str(i)] + " " + state_cert['Last name ' + str(i)] 
            namelst.append(fullname_str)
    ind = []
    for name in namelst:
        ind.extend(state_cert[state_cert[name].apply(lambda x: x in name_options)].index.tolist())
    #create subtable for the data we want, make it into a numeric value and sum it
    subtbl = state_cert.loc[ind]
    subtbl['Dollars'] = subtbl['Dollars'].apply(lambda x: float(x))
    subtbl['90th'] = subtbl['90th'].apply(lambda x: float(x) if x != '22/8' else 22/8)
    total_val = subtbl['Dollars'].sum() + subtbl['90th'].sum()/90
    return total_val

In [513]:
cumulative_CD_assets['DSLD Total'] = np.nan
dsldIndex = cumulative_CD_assets[cumulative_CD_assets['Different State Liquidated Debt'].apply(lambda x:
                                                                                               type(x) == list)].index
cumulative_CD_assets.loc[dsldIndex, 'DSLD Total'] = [dsldTotal(x) for x in dsldIndex]
cumulative_CD_assets['DSLD Total'] = cumulative_CD_assets['DSLD Total'].fillna(0)

### Same State Pierce Certificates

In [517]:
#calculate sum for pierce certificates
def pcTotal(ind):
    name_options = cumulative_CD_assets.loc[ind, 'Pierce Certificates']
    state = cumulative_CD_assets.loc[ind, 'state']
    
    pierce_state = pierce[pierce['State'].apply(lambda x: 
                                                pd.isnull(x) or x == state)]
    ind1 = pierce_state[pierce_state['Full Name'].apply(lambda x: 
                                                        x in name_options)].index.tolist()
    ind2 = pierce_state[pierce_state['Full Name 2'].apply(lambda x: 
                                                          x in name_options)].index.tolist()
    ind1.extend(ind2)
    total_val = pierce_state.loc[ind1]['Value'].sum()
    return total_val

In [518]:
cumulative_CD_assets['PC Total'] = np.nan
pcIndex = cumulative_CD_assets[cumulative_CD_assets['Pierce Certificates'].apply(lambda x: 
                                                                                 type(x) == list)].index
cumulative_CD_assets.loc[pcIndex, 'PC Total'] = [pcTotal(x) for x in pcIndex]
cumulative_CD_assets['PC Total'] = cumulative_CD_assets['PC Total'].fillna(0)

In [519]:
cumulative_CD_assets['Debt Total'] = cumulative_CD_assets[['SSLO Total','DSLO Total',
                                                           'SSLD Total', 'DSLD Total',
                                                           'PC Total']].sum(axis = 1)

In [520]:
pre1790_certs = ['Same State Loan Office','Different State Loan Office', 
                 'Same State Liquidated Debt','Different State Liquidated Debt','Pierce Certificates']
cumulative_CD_assets['tot_pre1790_certs'] = 5 - cumulative_CD_assets[pre1790_certs].isna().sum(axis = 1)

In [522]:
cumulative_CD_assets.to_csv("prepost_matched_debt_files.csv")