<h3> Data Cleaning </h3>

In [25]:
import numpy as np
import pandas as pd
import rapidfuzz
from rapidfuzz import process

In [26]:
delegates = pd.read_csv("../Data/Delegates/cleaned/constitutional_convention_delegates_cleaned.csv", index_col = 0)
state_delegates = pd.read_csv("../Data/Delegates/cleaned/State_Delegates_cleaned.csv", index_col = 0)

In [27]:
print("delegate dimensions:", delegates.shape)
delegates.head()

delegate dimensions: (55, 8)


Unnamed: 0,first name,last name,state,sign?,sign,last name 2,full name 1,full name 2
0,William Samuel,Johnson,Connecticut,yes,yes,,William Samuel Johnson,
1,Roger,Sherman,Connecticut,yes,yes,,Roger Sherman,
2,Oliver,Ellsworth,Connecticut,no,no,Elsworth,Oliver Ellsworth,Oliver Elsworth
3,George,Read,Delaware,yes,yes,,George Read,
4,Gunning,Bedford Jr.,Delaware,yes,yes,,Gunning Bedford Jr.,


In [28]:
print("state delegate dimensions:", state_delegates.shape)
state_delegates.head()

state delegate dimensions: (1696, 9)


Unnamed: 0,Last Name,First Name,Position,State,County / Town,Vote,full name 1,name duplication,state-name duplication
0,Stevens,John,President,NJ,Hunterdon,yes,John Stevens,True,False
1,Fell,John,,NJ,Bergen,yes,John Fell,False,False
2,Zabriskie,Peter,,NJ,Bergen,yes,Peter Zabriskie,False,False
3,Hennion,Cornelius,,NJ,Bergen,yes,Cornelius Hennion,False,False
4,Chetwood,John,,NJ,Essex,yes,John Chetwood,False,False


In [29]:
# eliminating name duplication because I don't think matching by state is feasible
state_delegates = state_delegates[state_delegates['name duplication'] == False]

# Algorithm for Name Matching

1. Run fuzzy match with score threshold of 80
2. Get unique set of names for each pair, get rid of ???
    1. If one word for either, remove match
    2. Else run word comparison - if there are min(n, m) matches (above 90) then keep match else discard
3. Go through manually

In [30]:
def fuzzy_merge(lst1, lst2, threshold=85, limit = 100):
    """
    :param df_1: the left list to join
    :param df_2: the right list to join
    :param threshold: how close the matches should be to return a match, based on Levenshtein distance
    :param limit: the amount of matches that will get returned, these are sorted high to low
    :return: dataframe with boths keys and matches
    """
    
    delegates = pd.Series([x for x in lst1.unique() if not pd.isnull(x)])
    possible =  [x for x in lst2.unique().tolist() if not pd.isnull(x)]
    #get matches
    #process.extract uses a combination of all four fuzzywuzzy scores
    matches = delegates.apply(lambda x: process.extract(x, possible, limit=limit))
    
    match_df = pd.DataFrame(columns = ['Delegates', 'Loan Matches'])
    
    for delegate, matchset in zip(delegates, matches):
        matchset_thres = [name for name in matchset if name[1] >= threshold]
        if len(matchset_thres) == 0:
            add_df = pd.DataFrame(data = {'Delegates': [delegate], 'Loan Matches': [""], 'Scores': [0]})
            match_df = pd.concat([match_df, add_df])
        else:
            delegate_lst = [delegate] * len(matchset_thres)
            add_df = pd.DataFrame(data = {'Delegates': delegate_lst, 
                                          'Loan Matches': [x[0] for x in matchset_thres],
                                          'Scores': [x[1] for x in matchset_thres]})
            match_df = pd.concat([match_df, add_df])

    return match_df

In [31]:
#function for performing the second step of the match
def matchFunction(lst1, lst2, score = 90):
    #check if our matches are actually min 2 words each
    #make sure our match is because the individual are similar, not because the phrase or one word in the phrase is similar
    #lst1 = list(pd.Series(lst1).unique())
    #lst2 = list(pd.Series(lst2).unique())
    threshold = min(len(lst1), len(lst2))
    matches = 0
    nomatch = []
    i = 0
    for wd1 in lst1:
        #modifying which words we compare - dont want to compare first in lst1 with last in lst2
        for wd2 in lst2:
            if wd1 not in nomatch and process.extract(wd1, [wd2])[0][1] > score:
                matches+=1
                nomatch.append(wd1)
        i+=1
    return matches >= threshold

## Matching Loan Office Certificates

### Matching on Constitutional Convention

In [32]:
def produceMatches(delegates, debt, delegate_names, debt_names,
                   threshold = 85, score = 90):
    initial = True
    join_df = pd.DataFrame()
    for del_name in delegate_names:
        for debt_name in debt_names:
            if initial:
                join_df = fuzzy_merge(delegates[del_name], debt[debt_name], threshold)
                initial = False
            else:
                add_df = fuzzy_merge(delegates[del_name], debt[debt_name], threshold)
                join_df = pd.concat([join_df, add_df])
    join_df = join_df.drop_duplicates().reset_index(drop = True)
    join_df = join_df[join_df['Scores'].apply(lambda x: x != 0)]
    join_df = join_df[join_df['Loan Matches'].apply(lambda x: not pd.isnull(x))]    
    
    #clean data
    join_df_p2 = join_df[join_df['Loan Matches'].apply(lambda x: len(list(set(x.replace("??", "").strip().split(" "))))>=2)]
    #run second step of matching
    join_df_p2_final = join_df_p2[[matchFunction(x.split(" "), y.split(" "), score) for x, y in zip(join_df_p2['Delegates'], join_df_p2['Loan Matches'])]]
    #select only the highest scoring loan match name pairing 
    join_df_p2_final.sort_values(by = 'Scores', ascending = False, inplace = True)
    join_df_p2_final_ind = join_df_p2_final[['Delegates','Loan Matches']].drop_duplicates().index
    join_df_p2_final = join_df_p2_final.loc[join_df_p2_final_ind]

    idx = join_df_p2_final.groupby(['Delegates'])['Scores'].transform(max) == join_df_p2_final['Scores']

    return join_df_p2_final[idx]

In [33]:
def addValues(l1, l2):
    for v in l2:
        if v not in l1:
            l1.append(v)
    return l1

In [34]:
def addLoanStates(df):
    df['state1'] = df['Loan Matches'].apply(lambda x: 
                                            list(loans[loans['full name 1'] == x]['State'].unique()))
    df['state2'] = df['Loan Matches'].apply(lambda x: 
                                            list(loans[loans['full name 2'] == x]['State'].unique()))
    df['state3'] = df['Loan Matches'].apply(lambda x: 
                                            list(loans[loans['full name 3'] == x]['State'].unique()))
    df['state'] = [addValues(addValues(s1, s2), s3) for s1, s2, s3 in zip(df['state1'], df['state2'], df['state3'])]
    df['state'] = df['state'].apply(lambda lst: ['unspecified' if pd.isnull(x) else x for x in lst])
    
    df.drop(['state1', 'state2', 'state3'], axis = 1, inplace = True)
    colnames = ['Delegates','Loan Matches','Scores']
    df = pd.concat([df, df['state'].apply(pd.Series)], axis = 1)
    colnames1 = colnames.copy()
    colnames1.extend(np.arange(len(df.columns)-4))
    df = df[colnames1]
    colnames.extend(['name'+str(i) for i in np.arange(len(df.columns) - 3)])
    df.columns = colnames
    df = pd.wide_to_long(df, stubnames = 'name', 
                         i = ['Delegates','Loan Matches', 'Scores'], 
                         j = 'Person').reset_index().drop('Person', axis = 1).drop_duplicates()
    df = df[df['name'].apply(lambda x: not pd.isnull(x))]
    return df

In [35]:
#prepare loan dataset
loans = pd.read_csv("../Data/Pre1790/Cleaned/loan_office_certificates_9_states_cleaned.csv", index_col = 0)

loans = loans.rename(columns = {"First Name 1 ":"First Name 1",
                                "Last Name 1 ":"Last Name 1"})

loans['full name 1'] = loans['First Name 1'] + " " + loans['Last Name 1']
loans['full name 2'] = loans['First Name 2'] + " " + loans['Last Name 2']
loans['full name 3'] = loans['First Name 3'] + " " + loans['Last Name 3']

In [36]:
#name matching for constitutional convention and state convention to debt list
loan_office_const = produceMatches(delegates, loans, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2', 'full name 3'], threshold = 85, score = 90)
loan_office_state = produceMatches(state_delegates, loans, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2', 'full name 3'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  join_df_p2_final.sort_values(by = 'Scores', ascending = False, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  join_df_p2_final.sort_values(by = 'Scores', ascending = False, inplace = True)


In [37]:
loan_office_const = addLoanStates(loan_office_const)
loan_office_state = addLoanStates(loan_office_state)

In [38]:
loan_office_const.rename(columns={"name": "state"}, inplace = True)
loan_office_state.rename(columns={"name": "state"}, inplace = True)

In [39]:
#master dataset
loan_office_const['match'] = 'Loan Office'
loan_office_state['match'] = 'Loan Office'
cumulative_matching_const = loan_office_const
cumulative_matching_state = loan_office_state

In [46]:
state_dict = dict(zip(np.arange(1, 10, 1), ['NH', 'MA', 'CT', 'NY', 'NJ', 'PA', 'DE', 'MD', 'VA']))
cumulative_matching_const['state'] = cumulative_matching_const['state'].apply(lambda x: state_dict[x])

KeyError: 'PA'

In [47]:
cumulative_matching_const[cumulative_matching_const['Scores'] != 100]

Unnamed: 0,Delegates,Loan Matches,Scores,state,match
40,William C. Houston,William C. Huston,97.142857,NJ,Loan Office
42,William Paterson,William Palterson,96.969697,NJ,Loan Office
44,William Paterson,William Patterson,96.969697,PA,Loan Office
46,Oliver Elsworth,Oliver Ellsworth,96.774194,CT,Loan Office
48,William Blount,William Blunt,96.296296,VA,Loan Office
50,David Brearly,David Brearley,96.296296,NJ,Loan Office
52,John F. Mercer,John Mercer,95.0,MD,Loan Office
54,William L. Pierce,William Pierce,95.0,PA,Loan Office
56,Gunning Bedford Jr.,Gunning Bedford,95.0,PA,Loan Office
57,Gunning Bedford Jr.,Gunning Bedford,95.0,DE,Loan Office


* Keep in mind that many individuals bought PA certificates, even if they were not PA residents
Notes
1. Same middle name, loan office state matches delegate state - confident about match
2. No William Palterson's, states match - confident
3. There are many William Patterson's (uncertain if match)
4. 2 Oliver Ellsworth's but fairly confident ours is a match
5. William Blunt's exist in NY - this William Blunt is in VA, and our William Blount is from NC (likely not a match)
6. Loan office state matches delegate state - confident about match
7. Many John Mercer's but same state (likely a match)
8. Many William Pierce's, no state match
9. Gunning Bedford match
10. William Samuel Johnson - many William Johnson's, not a match

# Pierce Certificates

What's the likelihood an individual received Pierce Certificates from two states


In [75]:
#prepare pierce certificates
pierce = pd.read_csv("../Data/Pre1790/Cleaned/Pierce_Certs_cleaned_2021.csv", index_col = 0)

#create full name
pierce['full name 1'] = pierce['First'] + " " + pierce['Last']
pierce['full name 2'] = pierce['First 2'] + " " + pierce['Last 2']

  pierce = pd.read_csv("../Data/Pre1790/Cleaned/Pierce_Certs_cleaned_2021.csv", index_col = 0)


In [77]:
#name matching for constitutional convention and state convention to debt list
pierce_const = produceMatches(delegates, pierce, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2'])
pierce_state = produceMatches(state_delegates, pierce, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  join_df_p2_final.sort_values(by = 'Scores', ascending = False, inplace = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  join_df_p2_final.sort_values(by = 'Scores', ascending = False, inplace = True)


In [78]:
def addPierceStates(df):
    df['state1'] = df['Loan Matches'].apply(lambda x: 
                                            list(pierce[pierce['full name 1'] == x]['State'].unique()))
    df['state2'] = df['Loan Matches'].apply(lambda x: 
                                            list(pierce[pierce['full name 2'] == x]['State'].unique()))
    df['state'] = [addValues(s1, s2) for s1, s2 in zip(df['state1'], df['state2'])]
    df['state'] = df['state'].apply(lambda lst: ['unspecified' if pd.isnull(x) else x for x in lst])

    df.drop(['state1', 'state2'], axis = 1, inplace = True)
    colnames = ['Delegates','Loan Matches','Scores']
    df = pd.concat([df, df['state'].apply(pd.Series)], axis = 1)
    colnames1 = colnames.copy()
    colnames1.extend(np.arange(len(df.columns)-4))
    df = df[colnames1]
    colnames.extend(['name'+str(i) for i in np.arange(len(df.columns) - 3)])
    df.columns = colnames
    df = pd.wide_to_long(df, stubnames = 'name', 
                         i = ['Delegates','Loan Matches', 'Scores'], 
                         j = 'Person').reset_index().drop('Person', axis = 1).drop_duplicates()
    df = df[df['name'].apply(lambda x: not pd.isnull(x))]
    return df

In [79]:
pierce_const = addPierceStates(pierce_const)
pierce_state = addPierceStates(pierce_state)

In [80]:
pierce_const.rename(columns={"name": "state"}, inplace = True)
pierce_state.rename(columns={"name": "state"}, inplace = True)

In [81]:
#master dataset
pierce_const['match'] = 'Pierce'
pierce_state['match'] = 'Pierce'

In [82]:
cumulative_matching_const = pd.concat([cumulative_matching_const, pierce_const]).drop_duplicates()

In [83]:
cumulative_matching_state = pd.concat([cumulative_matching_state, pierce_state]).drop_duplicates()

In [93]:
pierce[pierce['full name 1'] == 'John Mercer']

Unnamed: 0,CN,Last,First,Value,Group,To Whom Issued,State,Officer,Last 2,First 2,original text,full name 1,full name 2
55885,83851.0,Mercer,John,80.0,106.0,Regiment of officers and men paid to different...,,0.0,,,,John Mercer,


In [84]:
pierce_const[pierce_const['Scores'] != 100]

Unnamed: 0,Delegates,Loan Matches,Scores,state,match
88,Abraham Baldwin,Abraham Baldwine,96.774194,CT,Pierce
96,Charles Pinckney,Charles C Pinckney,95.0,unspecified,Pierce
104,William Samuel Johnson,William Johnson,95.0,CT,Pierce
105,William Samuel Johnson,William Johnson,95.0,unspecified,Pierce
106,William Samuel Johnson,William Johnson,95.0,MA,Pierce
107,William Samuel Johnson,William Johnson,95.0,NJ,Pierce
108,William Samuel Johnson,William Johnson,95.0,NY,Pierce
109,William Samuel Johnson,William Johnson,95.0,VA,Pierce
110,William Samuel Johnson,William Johnson,95.0,MD,Pierce
112,John Lansing Jr.,John Lansing,95.0,unspecified,Pierce


### Notes
1. Likely match - Baldwine doesn't exist, Baldwin fought in the war
2. Should match to Charles C. Pinckney
3. William Samuel Johnson does not match to William Johnson
4. John Lansing Jr. is a likely match
5. John Mercer is likely a match
6. Luther Martin is likely not a match
7. William Pierce should match to Virginia (unspecified??) - others are not matches
8. William Huston is not a match
9. William Davies is not a match

# Marine Certificates

In [300]:
#prepare pierce certificates
marine = pd.read_csv("../Data/Pre1790/Cleaned/Marine_Liquidated_Debt_Certificates_cleaned.csv").drop('Unnamed: 0', axis = 1)
#create full name
marine['full name 1'] = marine['First name'] + " " + marine['Last name']
marine['full name 2'] = marine['first name 2'] + " " + marine['last name 2']

In [301]:
#name matching for constitutional convention and state convention to debt list
marine_const = produceMatches(delegates, marine, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2'])
marine_state = produceMatches(state_delegates, marine, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [302]:
marine_const['state'] = 'Marine'
marine_state['state'] = 'Marine'

In [303]:
cumulative_matching_const = pd.concat([cumulative_matching_const, marine_const]).drop_duplicates()

In [304]:
cumulative_matching_state = pd.concat([cumulative_matching_state, marine_state]).drop_duplicates()

# Liquidated Debt Certificates

# Connecticut

In [305]:
#Read in file
CT = pd.read_csv("../Data/Pre1790/Cleaned/liquidated_debt_certificates_CT_cleaned.csv").drop('Unnamed: 0', axis = 1)

#create full name
CT['full name 1'] = CT['First name'] + " " + CT['Last name']
CT['full name 2'] = CT['First name 2'] + " " + CT['Last name 2']

In [306]:
#name matching for constitutional convention and state convention to debt list
CT_const = produceMatches(delegates, CT, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2'])
CT_state = produceMatches(state_delegates, CT, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [307]:
CT_const['state'] = 'CT'
CT_state['state'] = 'CT'

In [308]:
cumulative_matching_const = pd.concat([cumulative_matching_const, CT_const]).drop_duplicates()

In [309]:
cumulative_matching_state = pd.concat([cumulative_matching_state, CT_state]).drop_duplicates()

# Delaware

In [310]:
#Read in file
DE = pd.read_csv("../Data/Pre1790/Cleaned/liquidated_debt_certificates_DE_cleaned.csv").drop('Unnamed: 0', axis = 1)

#create full name
DE['full name 1'] = DE['First name'] + " " + DE['Last name']
DE['full name 2'] = DE['First name 2'] + " " + DE['Last name 2']

In [311]:
#name matching for constitutional convention and state convention to debt list
DE_const = produceMatches(delegates, DE, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1'])
DE_state = produceMatches(state_delegates, DE, delegate_names = ['full name 1'], debt_names = ['full name 1'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [312]:
DE_const['state'] = 'DE'
DE_state['state'] = 'DE'

In [313]:
cumulative_matching_const = pd.concat([cumulative_matching_const, DE_const]).drop_duplicates()

In [314]:
cumulative_matching_state = pd.concat([cumulative_matching_state, DE_state]).drop_duplicates()

# Massachusetts

In [315]:
#Read in file
MA = pd.read_csv("../Data/Pre1790/Cleaned/liquidated_debt_certificates_MA_cleaned.csv").drop('Unnamed: 0', axis = 1)

#create full name
MA['full name 1'] = MA['First name'] + " " + MA['Last name']
MA['full name 2'] = MA['First name 2'] + " " + MA['Last name 2']

In [316]:
#name matching for constitutional convention and state convention to debt list
MA_const = produceMatches(delegates, MA, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2'])
MA_state = produceMatches(state_delegates, MA, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [317]:
MA_const['state'] = 'MA'
MA_state['state'] = 'MA'

In [318]:
cumulative_matching_const = pd.concat([cumulative_matching_const, MA_const]).drop_duplicates()

In [319]:
cumulative_matching_state = pd.concat([cumulative_matching_state, MA_state]).drop_duplicates()

# New Hampshire

In [320]:
#Read in file
NH = pd.read_csv('../Data/Pre1790/cleaned/liquidated_debt_certificates_NH_cleaned.csv').drop('Unnamed: 0', axis = 1)

#create full name
NH['full name 1'] = NH['First name'] + " " + NH['Last name']

In [321]:
#name matching for constitutional convention and state convention to debt list
NH_const = produceMatches(delegates, NH, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1'])
NH_state = produceMatches(state_delegates, NH, delegate_names = ['full name 1'], debt_names = ['full name 1'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [322]:
NH_const['state'] = 'NH'
NH_state['state'] = 'NH'

In [323]:
cumulative_matching_const = pd.concat([cumulative_matching_const, NH_const]).drop_duplicates()

In [324]:
cumulative_matching_state = pd.concat([cumulative_matching_state, NH_state]).drop_duplicates()

# New Jersey

In [325]:
#Read in file
NJ = pd.read_csv('../Data/Pre1790/cleaned/liquidated_debt_certificates_NJ_cleaned.csv').drop('Unnamed: 0', axis = 1)

#create full name
NJ['full name 1'] = NJ['First name'] + " " + NJ['Last name']
NJ['full name 2'] = NJ['First name 2'] + " " + NJ['Last name 2']

In [326]:
#name matching for constitutional convention and state convention to debt list
NJ_const = produceMatches(delegates, NJ, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2'])
NJ_state = produceMatches(state_delegates, NJ, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [327]:
NJ_const['state'] = 'NJ'
NJ_state['state'] = 'NJ'

In [328]:
cumulative_matching_const = pd.concat([cumulative_matching_const, NJ_const]).drop_duplicates()

In [329]:
cumulative_matching_state = pd.concat([cumulative_matching_state, NJ_state]).drop_duplicates()

# New York

In [330]:
#Read in file
NY = pd.read_csv('../Data/Pre1790/cleaned/liquidated_debt_certificates_NY_cleaned.csv').drop('Unnamed: 0', axis = 1)


#create full name
NY['full name 1'] = NY['First name'] + " " + NY['Last name']
NY['full name 2'] = NY['First name 2'] + " " + NY['Last name 2']
NY['full name 3'] = NY['First name 3'] + " " + NY['Last name 3']

In [331]:
#name matching for constitutional convention and state convention to debt list
NY_const = produceMatches(delegates, NY, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2', 'full name 3'])
NY_state = produceMatches(state_delegates, NY, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2', 'full name 3'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [332]:
NY_const['state'] = 'NJ'
NY_const['state'] = 'NJ'

In [333]:
cumulative_matching_const = pd.concat([cumulative_matching_const, NY_const]).drop_duplicates()

In [334]:
cumulative_matching_state = pd.concat([cumulative_matching_state, NY_state]).drop_duplicates()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


# Pennsylvania P1

In [335]:
#Read in file
PA = pd.read_csv('../Data/Pre1790/cleaned/liquidated_debt_certificates_PA_story_cleaned.csv').drop('Unnamed: 0', axis = 1)

#create full name
PA['full name 1'] = PA['First name'] + " " + PA['Last name']

In [336]:
#name matching for constitutional convention and state convention to debt list
PA_const = produceMatches(delegates, PA, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1'])
PA_state = produceMatches(state_delegates, PA, delegate_names = ['full name 1'], debt_names = ['full name 1'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [337]:
PA_const['state'] = 'PA'
PA_state['state'] = 'PA'

In [338]:
cumulative_matching_const = pd.concat([cumulative_matching_const, PA_const]).drop_duplicates()

In [339]:
cumulative_matching_state = pd.concat([cumulative_matching_state, PA_state]).drop_duplicates()

# Pennsylvania P2

In [340]:
#Read in file
PA2 = pd.read_csv("../Data/Pre1790/cleaned/liquidated_debt_certificates_PA_stelle_cleaned.csv").drop('Unnamed: 0', axis = 1)
#create full name
PA2['full name 1'] = PA2['First name'] + " " + PA2['Last name']
PA2['full name 2'] = PA2['First name 2'] + " " + PA2['Last name 2']

In [341]:
#name matching for constitutional convention and state convention to debt list
PA_const_2 = produceMatches(delegates, PA2, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2'])
PA_state_2 = produceMatches(state_delegates, PA2, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [342]:
PA_const_2['state'] = 'PA'
PA_state_2['state'] = 'PA'

In [343]:
cumulative_matching_const = pd.concat([cumulative_matching_const, PA_const_2]).drop_duplicates()

In [344]:
cumulative_matching_state = pd.concat([cumulative_matching_state, PA_state_2]).drop_duplicates()

# Rhode Island

In [345]:
#Read in file
RI = pd.read_csv("../Data/Pre1790/cleaned/liquidated_debt_certificates_RI_cleaned.csv").drop('Unnamed: 0', axis = 1)
#create full name
RI['full name 1'] = RI['First name'] + " " + RI['Last name']
RI['full name 2'] = RI['First name 2'] + " " + RI['Last name 2']

In [346]:
#name matching for constitutional convention and state convention to debt list
RI_const = produceMatches(delegates, RI, delegate_names = ['full name 1', 'full name 2'], debt_names = ['full name 1', 'full name 2'])
RI_state = produceMatches(state_delegates, RI, delegate_names = ['full name 1'], debt_names = ['full name 1', 'full name 2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [347]:
RI_const['state'] = 'RI'
RI_state['state'] = 'RI'

In [348]:
cumulative_matching_const = pd.concat([cumulative_matching_const, RI_const]).drop_duplicates()

In [349]:
cumulative_matching_state = pd.concat([cumulative_matching_state, RI_state]).drop_duplicates()

# Final Steps

In [350]:
cum_matching = pd.concat([cumulative_matching_const, cumulative_matching_state]).drop_duplicates()

In [351]:
cum_matching.to_csv('../Data/final_matching.csv')