In [14]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the datasets
house_data = pd.read_csv('data/1976-2022-house.csv')
finance_data = pd.read_csv('data/weball22.txt', delimiter='|', header=None)

# Rename finance data columns
finance_columns = ['CAND_ID', 'CAND_NAME', 'CAND_ICI', 'PTY_CD', 'CAND_PTY_AFFILIATION', 'TTL_RECEIPTS', 'TRANS_FROM_AUTH', 'TTL_DISB',
                   'TRANS_TO_AUTH', 'COH_BOP', 'COH_COP', 'CAND_CONTRIB', 'CAND_LOANS', 'OTHER_LOANS', 'CAND_LOAN_REPAY',
                   'OTHER_LOAN_REPAY', 'DEBTS_OWED_BY', 'TTL_INDIV_CONTRIB', 'CAND_OFFICE_ST', 'CAND_OFFICE_DISTRICT', 'SPEC_ELECTION',
                   'PRIM_ELECTION', 'RUN_ELECTION', 'GEN_ELECTION', 'GEN_ELECTION_PRECENT', 'OTHER_POL_CMTE_CONTRIB', 'POL_PTY_CONTRIB',
                   'CVG_END_DT', 'INDIV_REFUNDS', 'CMTE_REFUNDS']

In [15]:
finance_data.columns = finance_columns

# Filter finance data for House candidates in 2022
finance_data_2022 = finance_data[(finance_data['CAND_OFFICE_ST'].notna()) & (finance_data['CVG_END_DT'].str[-4:] == '2022')]

# Merge datasets
merged_data = pd.merge(house_data[house_data['year'] == 2022], 
                       finance_data_2022, 
                       left_on=['state_po', 'district'], 
                       right_on=['CAND_OFFICE_ST', 'CAND_OFFICE_DISTRICT'],
                       how='left')

# Calculate total votes and spending per district
district_summary = merged_data.groupby(['state_po', 'district']).agg({
    'candidatevotes': 'sum',
    'TTL_RECEIPTS': 'sum'
}).reset_index()

district_summary['spending_per_vote'] = district_summary['TTL_RECEIPTS'] / district_summary['candidatevotes']

In [16]:
# Plot spending per vote vs total votes
plt.figure(figsize=(12, 8))
plt.scatter(district_summary['candidatevotes'], district_summary['spending_per_vote'], alpha=0.5)
plt.xlabel('Total Votes in District')
plt.ylabel('Spending per Vote ($)')
plt.title('Campaign Spending Efficiency in 2022 House Elections')
plt.xscale('log')
plt.yscale('log')
plt.grid(True)
plt.savefig('output/spending_vs_votes.png')
plt.close()

# Analyze party spending
party_spending = merged_data.groupby('party').agg({
    'TTL_RECEIPTS': 'sum',
    'candidatevotes': 'sum'
}).reset_index()

party_spending['spending_per_vote'] = party_spending['TTL_RECEIPTS'] / party_spending['candidatevotes']

print("Party Spending Analysis:")
print(party_spending)

# Analyze incumbent vs challenger performance
incumbent_performance = merged_data.groupby('CAND_ICI').agg({
    'TTL_RECEIPTS': 'mean',
    'candidatevotes': 'mean'
}).reset_index()

incumbent_performance['spending_per_vote'] = incumbent_performance['TTL_RECEIPTS'] / incumbent_performance['candidatevotes']

print("\nIncumbent vs Challenger Performance:")
print(incumbent_performance)

# Save results to CSV
district_summary.to_csv('output/district_summary_2022.csv', index=False)
party_spending.to_csv('output/party_spending_2022.csv', index=False)
incumbent_performance.to_csv('output/incumbent_performance_2022.csv', index=False)

Party Spending Analysis:
                           party  TTL_RECEIPTS  candidatevotes  \
0                       ALLIANCE  9.170004e+06           26340   
1    AMERICAN CONSTITUTION PARTY  5.234086e+06           67822   
2                AMERICAN VALUES  1.107344e+07            6180   
3                    CANNON FIRE  2.237513e+06            3852   
4          COLORADO CENTER PARTY  9.169601e+06           12494   
5                   CONSERVATIVE  1.544611e+08         2252730   
6                   CONSTITUTION  3.676512e+07          315879   
7                       DEMOCRAT  2.214453e+09       341250110   
8                     DEMOCRATIC  5.159211e+07           99010   
9        DEMOCRATIC-FARMER-LABOR  1.637539e+07         3597195   
10                FOR THE PEOPLE  2.237513e+06           20058   
11              GOD SAVE AMERICA  1.217803e+07            8928   
12  GRASSROOTS-LEGALIZE CANNABIS  6.674943e+06           74145   
13                         GREEN  9.537437e+06     