In [1]:
import pandas as pd
import numpy as np

In [2]:
csv = pd.read_csv("stacyabramscampaigndec2018.csv")

In [3]:
csv.head()

Unnamed: 0,FilerID,Type,LastName,FirstName,Address,City,State,Zip,PAC,Occupation,...,Election,Election_Year,Cash_Amount,In_Kind_Amount,In_Kind_Description,Candidate_FirstName,Candidate_MiddleName,Candidate_LastName,Candidate_Suffix,Committee_Name
0,C2017000285,Monetary,Barlow,Allison,105 Berkeley Pl,Brooklyn,NY,11217-3786,,Program Director,...,General,2018,100.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor
1,C2017000285,Monetary,Barlow,Cathy,6130 Ardleigh St,Philadelphia,PA,19138-1520,,Attorney,...,General,2018,50.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor
2,C2017000285,Monetary,Barlow,Jesse,1427 S Pugh St,State College,PA,16801-6132,,Professor,...,General,2018,50.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor
3,C2017000285,Monetary,Barnett,Elizabeth B.,1123 Narcisco St NE,Albuquerque,NM,87112-6656,,Not employed,...,General,2018,3.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor
4,C2017000285,Monetary,Barrett,Nolen,346 29th Ave,San Francisco,CA,94121-1703,,Not Employed,...,General,2018,125.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor


In [4]:
def importer(path):
    '''Reads in and formats CSV accordingly'''
    df = pd.read_csv(path)
    
    return df

path = 'stacyabramscampaigndec2018.csv'

campaign = importer(path)

In [5]:
def id_maker(df, col_name, identity_fields, base_number=1000000000):
    '''Read in DataFrame and a list of fields used to identify an entity. Assign a number to those fields.'''
    
    match_table = df[identity_fields].drop_duplicates().copy()
    match_table[col_name] = np.arange(base_number, base_number+len(match_table))
    
    df = pd.merge(df,
                  match_table,
                  on=identity_fields,
                  how='left')
    
    return df[col_name]



#id_check = id_maker(df = campaign, col_name = 'ContributorId', identity_fields = ['LastName', 'FirstName', 'Zip', 'Address'])

In [6]:
def contribution(df, key_col='ContributionId'):
    '''Parse and prep candidate info from the database. Candidate schema:
    
    ContributionId
    FilerId
    CandidateId
    ScrapeLogId
    ContributorId
    ContributionType
    ContributionDate
    Amount
    Description
    '''
    
    contribution = df.copy() #copy dataframe so as not to impact the data in the original location
    
    #Prepare dataframe to use same fields as in schema
    contribution_fields = ['ContributionId', 'FilerID', 'CandidateId', 'ScrapeLogId', 'ContributorId', 'Type', 'Date', 'Amount', 'Description']
    
    #contribution['CandidateId'] =
    #contribution['ContributorId'] = 
    #contribution['ScrapeLogId] = 
    
    #there are two 'Amount' columns. One is 'Cash' and another is 'In-Kind'. Combining the two columns into one.
    contribution['Amount'] = contribution.apply(lambda x: x['Cash_Amount'] if x['Type'] == 'Monetary' else x['In_Kind_Amount'], axis=1)
    
    contribution[key_col] = id_maker(df=contribution, 
                                    col_name=key_col, 
                                    identity_fields=['Candidate_FirstName', 'Candidate_MiddleName', 'Candidate_LastName', 'Date', 'Committee_Name', 'FirstName', 'LastName', 'Amount'])
    
    
    
    
    #contribution['Description'] 
    
    #contribution = contribution[contribution_fields].drop_duplicates(subset=['ContributionId']).reset_index(drop=True)
    
    return contribution

In [7]:
contribution = contribution(campaign)

In [8]:
contribution

Unnamed: 0,FilerID,Type,LastName,FirstName,Address,City,State,Zip,PAC,Occupation,...,Cash_Amount,In_Kind_Amount,In_Kind_Description,Candidate_FirstName,Candidate_MiddleName,Candidate_LastName,Candidate_Suffix,Committee_Name,Amount,ContributionId
0,C2017000285,Monetary,Barlow,Allison,105 Berkeley Pl,Brooklyn,NY,11217-3786,,Program Director,...,100.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,100.0,1000000000
1,C2017000285,Monetary,Barlow,Cathy,6130 Ardleigh St,Philadelphia,PA,19138-1520,,Attorney,...,50.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,50.0,1000000001
2,C2017000285,Monetary,Barlow,Jesse,1427 S Pugh St,State College,PA,16801-6132,,Professor,...,50.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,50.0,1000000002
3,C2017000285,Monetary,Barnett,Elizabeth B.,1123 Narcisco St NE,Albuquerque,NM,87112-6656,,Not employed,...,3.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,3.0,1000000003
4,C2017000285,Monetary,Barrett,Nolen,346 29th Ave,San Francisco,CA,94121-1703,,Not Employed,...,125.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,125.0,1000000004
5,C2017000285,Monetary,Bartlett,Stephen,659 N Tomahawk Island Dr,Portland,OR,97217-7968,,writer,...,25.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,25.0,1000000005
6,C2017000285,Monetary,Bartlett,Steve,659 N Tomahawk Island Dr,Portland,OR,97217-7968,,writer,...,7.5,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,7.5,1000000006
7,C2017000285,Monetary,Barwick,William D.,15 Vernon Rd NW,Atlanta,GA,30305-2963,,Partner,...,1000.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,1000.0,1000000007
8,C2017000285,Monetary,Baskin,Lisa,PO Box 314,Leeds,MA,01053-0314,,Historian,...,50.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,50.0,1000000008
9,C2017000285,Monetary,Barney,Alicia W,421 E Live Oak St,San Gabriel,CA,91776-1519,,Human Resources,...,50.0,0.0,,Stacey,Yvonne,Abrams,,Stacy Abrams for Governor,50.0,1000000009
