In [1]:
import pandas as pd

In [3]:
contributions = pd.read_csv('individual_contributions.csv', sep='|', header=None,
                            usecols=[0, 3, 10, 13, 14, 20],
                            names=['CMTE_ID',
                                  'TRANSACTION_PGI',
                                  'ZIP_CODE',
                                  'TRANSACTION_DT',
                                  'TRANSACTION_AMT',
                                  'SUB_ID'],
                            dtype={"CMTE_ID": str, 
                                   "TRANSACTION_PGI": str,
                                   "ZIP_CODE": str,
                                  'TRANSACTION_DT': str,
                                   "TRANSACTION_AMT": float, 
                                   "SUB_ID": str
                                  }
                           )

In [4]:
contributions['zipcode_5'] = contributions['ZIP_CODE'].str[0:5]
contributions['ZIP_CODE'] = pd.to_numeric(contributions['ZIP_CODE'], errors='coerce')

In [5]:
contributions.dropna(subset=['CMTE_ID', 'ZIP_CODE','TRANSACTION_AMT','SUB_ID','zipcode_5'],inplace=True)
contributions = contributions.loc[contributions['TRANSACTION_AMT'] > 0]

In [6]:
# Illinois Exclusive

#contributions = contributions.loc[(contributions['ZIP_CODE'] > 600000000) & (contributions['ZIP_CODE'] < 630000000)]

In [7]:
contributions.head()

Unnamed: 0,CMTE_ID,TRANSACTION_PGI,ZIP_CODE,TRANSACTION_DT,TRANSACTION_AMT,SUB_ID,zipcode_5
0,C00088591,P,220424511.0,2132015,500.0,4032020151240885624,22042
1,C00088591,P,220424511.0,2132015,200.0,4032020151240885819,22042
2,C00088591,P,220424511.0,2272015,200.0,4032020151240885820,22042
3,C00088591,P,220424511.0,2132015,200.0,4032020151240885683,22042
4,C00088591,P,220424511.0,2272015,200.0,4032020151240885684,22042


In [8]:
candidate_committee = pd.read_csv('candidate_committee.csv', sep='|')
candidate_committee = candidate_committee[['CAND_ID', 'CMTE_ID']]
candidate_committee.head()

Unnamed: 0,CAND_ID,CMTE_ID
0,H0AK00097,C00525261
1,H0AL02087,C00462143
2,H0AL02087,C00493783
3,H0AL05049,C00239038
4,H0AL05163,C00464149


In [9]:
candidates = pd.read_csv('candidates.csv', sep='|', names=['CAND_ID', 'CAND_PARTY'], usecols=[0, 2],).iloc[1:, :]
#candidates = candidates[['CAND_ID', 'CMTE_ID']]
candidates.head()
contributions = contributions.merge(candidate_committee.merge(candidates, how='inner'))
contributions.head()

Unnamed: 0,CMTE_ID,TRANSACTION_PGI,ZIP_CODE,TRANSACTION_DT,TRANSACTION_AMT,SUB_ID,zipcode_5,CAND_ID,CAND_PARTY
0,C00415182,G,980402804.0,3252015,2700.0,4041520151241859792,98040,H6MD03292,DEM
1,C00415182,P,980402804.0,3252015,2700.0,4041520151241859793,98040,H6MD03292,DEM
2,C00415182,P,210612514.0,1232015,500.0,4041520151241859794,21061,H6MD03292,DEM
3,C00415182,G,212182521.0,3302015,2700.0,4041520151241859806,21218,H6MD03292,DEM
4,C00415182,P,212182521.0,3302015,2700.0,4041520151241859807,21218,H6MD03292,DEM


In [10]:
#contributions.to_csv('contributions.csv', index=False)
contributions.to_csv('all_contributions.csv', index=False)

In [26]:
grouped_zip = contributions.groupby(['zipcode_5'])
zip_summary_df = pd.DataFrame()
zip_summary_df['donations_sum'] = grouped_zip['TRANSACTION_AMT'].sum()
zip_summary_df['donations_median'] = grouped_zip['TRANSACTION_AMT'].median()
zip_summary_df['donations_count'] = grouped_zip['SUB_ID'].count()
zip_summary_df['CAND_PARTY'] = 'TOTAL'
zip_summary_df = zip_summary_df[['CAND_PARTY', 'donations_sum', 'donations_median', 'donations_count']]
zip_summary_df.reset_index(inplace=True)
zip_summary_df.head()

Unnamed: 0,zipcode_5,CAND_PARTY,donations_sum,donations_median,donations_count
0,3102,TOTAL,400.0,100.0,4
1,0,TOTAL,1197070.0,50.0,7589
2,1,TOTAL,3250.0,500.0,3
3,3,TOTAL,1600.0,250.0,4
4,6,TOTAL,275.0,75.0,4


In [19]:
grouped_party = contributions.groupby(['zipcode_5', 'CAND_PARTY'])
party_summary_df = pd.DataFrame()
party_summary_df['donations_sum'] = grouped_party['TRANSACTION_AMT'].sum()
party_summary_df['donations_median'] = grouped_party['TRANSACTION_AMT'].median()
party_summary_df['donations_count'] = grouped_party['SUB_ID'].count()
party_summary_df.reset_index(inplace=True)
party_summary_df.head()

Unnamed: 0,zipcode_5,CAND_PARTY,donations_sum,donations_median,donations_count
0,3102,DEM,400.0,100.0,4
1,0,DEM,1187085.0,50.0,7566
2,0,DFL,100.0,100.0,1
3,0,REP,9885.0,42.5,22
4,1,DEM,550.0,275.0,2


In [27]:
zip_summary_df.dtypes

zipcode_5            object
CAND_PARTY           object
donations_sum       float64
donations_median    float64
donations_count       int64
dtype: object

In [28]:
final_df = zip_summary_df.append(party_summary_df, sort=False)

In [29]:
final_df.to_csv('zipcode_donations_final_all.csv', index=True, index_label='id')

In [3]:
census_df = pd.read_csv('census_data_2016.csv',
                       usecols=['GEO.id', 'HD01_VD01', 'HD01_VD02',
                                'HC01_VC06', 'HC01_VC07','HC01_VC85', 'HC01_VC131',
                                'HC01_EST_VC08', 'HC01_EST_VC11', 'HC01_EST_VC13',
                                'HC01_EST_VC14', 'HC01_EST_VC15'])
header=['zipcode', 'pop_total', 'pop_white', 
        'pop_employed','pop_unemployed', 'median_household_income', 'pop_with_healthcare',
        'edu_25+_total', '25+_HS', 'edu_25+_assoc', 
        'edu_25+_bachelor', 'edu_25+_grad']
census_df = census_df[1:]
census_df.columns = header
census_df.head()
#https://www.census.gov/glossary/#term_Employed

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,zipcode,pop_total,pop_white,pop_employed,pop_unemployed,median_household_income,pop_with_healthcare,edu_25+_total,25+_HS,edu_25+_assoc,edu_25+_bachelor,edu_25+_grad
1,8600000US00601,17800,14436,3904,2152,11507,16503,11887,2985,604,1845,376
2,8600000US00602,39716,22941,11560,3116,15511,37497,27546,6076,3689,3840,1736
3,8600000US00603,51565,35176,12722,3768,16681,47081,35589,9590,2890,5809,2239
4,8600000US00606,6320,3739,1467,205,11648,6167,4381,1553,206,349,77
5,8600000US00610,27976,16259,8327,1587,17751,27106,19237,6329,1618,2368,626


In [4]:
for column in header[1:]:
    census_df[column] = pd.to_numeric(census_df[column], errors='coerce')
census_df.dtypes

zipcode                     object
pop_total                    int64
pop_white                    int64
pop_employed                 int64
pop_unemployed               int64
median_household_income    float64
pop_with_healthcare          int64
edu_25+_total                int64
25+_HS                       int64
edu_25+_assoc                int64
edu_25+_bachelor             int64
edu_25+_grad                 int64
dtype: object

In [5]:
census_summary_df = pd.DataFrame()
census_summary_df['zipcode_5'] = census_df['zipcode'].str[9:14]
census_summary_df['pop_total'] = census_df['pop_total']
census_summary_df['unemployment_rate'] = census_df['pop_unemployed'] / (census_df['pop_unemployed'] + census_df['pop_employed'])
census_summary_df['median_household_income'] = census_df['median_household_income']
census_summary_df['healthcare_rate'] = census_df['pop_with_healthcare'] / census_df['pop_total']
census_summary_df['hs_graduation_rate'] = census_df['25+_HS'] / census_df['edu_25+_total']
census_summary_df['assoc_degree_rate'] = census_df['edu_25+_assoc'] / census_df['edu_25+_total']
census_summary_df['bachelor_degree_rate'] = census_df['edu_25+_bachelor'] / census_df['edu_25+_total']
census_summary_df['grad_degree_rate'] = census_df['edu_25+_grad'] / census_df['edu_25+_total']
census_summary_df.head()

Unnamed: 0,zipcode_5,pop_total,unemployment_rate,median_household_income,healthcare_rate,hs_graduation_rate,assoc_degree_rate,bachelor_degree_rate,grad_degree_rate
1,601,17800,0.35535,11507.0,0.927135,0.251115,0.050812,0.155212,0.031631
2,602,39716,0.212319,15511.0,0.944128,0.220576,0.133921,0.139403,0.063022
3,603,51565,0.228502,16681.0,0.913042,0.269465,0.081205,0.163225,0.062913
4,606,6320,0.122608,11648.0,0.975791,0.354485,0.047021,0.079662,0.017576
5,610,27976,0.160077,17751.0,0.968902,0.329001,0.084109,0.123096,0.032541


In [6]:
census_summary_df.dtypes
# Illinois Exclusive

census_summary_df = census_summary_df.loc[pd.to_numeric(census_summary_df['zipcode_5']) >= 60000].loc[pd.to_numeric(census_summary_df['zipcode_5']) < 63000]

In [7]:
census_summary_df.to_csv('census_data.csv', index=True, index_label='id')

In [None]:
census_summary_df.shape

In [1]:
@app.route('/api/donations/', defaults={'search_term': 'TOTAL'})
                                @app.route("/api/donations/<search_term>")
                                def donations(search_term):
                                    donation_results = db.session.execute(f'SELECT zd.zipcode_5, donations_sum, donations_median, donations_count, zipcode_geojson FROM zipcode_donations zd join zi_p5 on zi_p5.zipcode_5 = zd.zipcode_5 WHERE CAND_PARTY = "{search_term}"')
                                    maximums_results = db.session.execute(f'SELECT max(donations_sum), max(donations_median), max(donations_count) FROM zipcode_donations WHERE CAND_PARTY = "{search_term}"').first()
                                    features = []
                                    maximums = {
                                        "donations_sum": float(maximums_results[0]),
                                        "donations_median": float(maximums_results[1]),
                                        "donations_count": float(maximums_results[2])
                                    }
                                    for result in donation_results:
                                        geometry = json.loads(result[4])
                                        features.append({
                                            "type": "Feature",
                                            "properties": {
                                                "zipcode": result[0],
                                                "donations_sum": float(result[1]),
                                                "donations_median": float(result[2]),
                                                "donations_count": float(result[3])
                                            },
                                            "geometry": geometry
                                            })
                                    return jsonify({"type": "FeatureCollection", "features": features, "maximums": maximums})  

SyntaxError: invalid syntax (<ipython-input-1-2e2f7ab8bba8>, line 1)