In [29]:
import pandas as pd
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

### Preliminary EDA on column content, combine into expenditure dataset

In [30]:
df_general = pd.read_csv('/project/data/general_exp_con.csv')
df_independent = pd.read_csv('/project/data/independent_exp.csv')

In [31]:
df_general.columns

Index(['Committee reg num', 'Committee name', 'Entity type', 'Entity sub-type',
       'Vendor name', 'Vendor address 1', 'Vendor address 2', 'Vendor city',
       'Vendor state', 'Vendor zip', 'Amount', 'Unpaid amount', 'Date',
       'Purpose', 'Year', 'Type', 'In-kind descr', 'In-kind?',
       'Affected committee name', 'Affected committee reg num'],
      dtype='object')

In [32]:
df_independent.columns

Index(['Spender', 'Spender Reg Num', 'Spender type', 'Spender sub-type',
       'Affected Comte Name', 'Affected Cmte Reg Num', 'For /Against', 'Year',
       'Date', 'Type', 'Amount', 'Unpaid amount', 'In kind?', 'In kind descr',
       'Purpose', 'Vendor name', 'Vendor address 1', 'Vendor address 2',
       'Vendor city', 'Vendor State', 'Vendor zip'],
      dtype='object')

Columns to keep:
- Year
- Spender Reg Num: Committee reg num, Spender Reg Num
- Spender Name: Committee name, Spender Name
- Spender type: Entity type, Spender type
- Spender sub-type: Entity sub-type, Spender sub-type
- Vendor name
- Vendor State: Vendor state, Vendor State
- Vendor City: Vendor city, Vendor city
- Amount
- Unpaid amount
- In kind?
- In kind descr
- Type
- Purpose
- Affected committee name
- Affected committee reg num

In [33]:
columns_to_keep = ['Year', 'Committee reg num', 'Committee name', 'Entity type', 
     'Entity sub-type', 'Vendor name', 'Vendor state', 'Vendor city', 'Amount', 
     'Unpaid amount', 'In-kind?', 'In-kind descr', 'Type', 'Purpose', 
     'Affected committee name', 'Affected committee reg num']
df_general = df_general[columns_to_keep]
column_mapping = {'Committee reg num': 'Spender Reg Num', 'Committee name': 'Spender Name',
                    'Entity type': 'Spender type', 'Entity sub-type': 'Spender sub-type'}
df_general.rename(columns=column_mapping, inplace=True)

In [34]:
columns_to_keep1 = ['Year', 'Spender Reg Num', 'Spender', 'Spender type', 
     'Spender sub-type', 'Vendor name', 'Vendor State', 'Vendor city', 'Amount', 
     'Unpaid amount', 'In kind?', 'In kind descr', 'Type', 'Purpose', 
     'Affected Comte Name', 'Affected Cmte Reg Num']
df_independent = df_independent[columns_to_keep1]
column_mapping1 = {'Spender': 'Spender Name', 'Affected Comte Name': 'Affected committee name', 
                   'Affected Cmte Reg Num': 'Affected committee reg num',
                    'Vendor State': 'Vendor state', 'In kind?': 'In-kind?', 'In kind descr': 'In-kind descr'}
df_independent.rename(columns=column_mapping1, inplace=True)

In [35]:
df_general.columns == df_independent.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])

In [36]:
df_independent[df_independent['Year']==2023]['Spender type'].unique()

array(['PCF'], dtype=object)

In [37]:
df_expenditure = pd.concat([df_general, df_independent])
len(df_expenditure)

664190

In [38]:
df_expenditure = df_expenditure.sort_values(by='Year', ascending=False)
df_expenditure['TotalAmount'] = df_expenditure['Amount'] + df_expenditure['Unpaid amount']

In [39]:
df_expenditure.columns

Index(['Year', 'Spender Reg Num', 'Spender Name', 'Spender type',
       'Spender sub-type', 'Vendor name', 'Vendor state', 'Vendor city',
       'Amount', 'Unpaid amount', 'In-kind?', 'In-kind descr', 'Type',
       'Purpose', 'Affected committee name', 'Affected committee reg num',
       'TotalAmount'],
      dtype='object')

In [40]:
df_expenditure['Spender type'].unique()

array(['PCF', 'PTU', 'PCC'], dtype=object)

In [41]:
df_expenditure['Spender sub-type'].unique()

array(['PF', 'PC', 'IEC', 'IEF', 'PFN', nan, 'PCN', 'BF', 'CAU', 'SPU',
       'BC', 'BCN'], dtype=object)

In [42]:
df_expenditure.isna().sum()

Year                               0
Spender Reg Num                    0
Spender Name                      81
Spender type                       0
Spender sub-type              156052
Vendor name                    38386
Vendor state                   67437
Vendor city                    67540
Amount                             0
Unpaid amount                      0
In-kind?                           0
In-kind descr                 639990
Type                               0
Purpose                        66290
Affected committee name       236995
Affected committee reg num    236693
TotalAmount                        0
dtype: int64

Based on our definition of un-classifiable expenditure, missing values are 81 records with no spender name and 38386 records with no vendor name

In [43]:
df_expenditure = df_expenditure.dropna(subset=['Spender Name'], how='any')
df_expenditure = df_expenditure.dropna(subset=['Vendor name'], how='any')
df_expenditure = df_expenditure.reset_index(drop=True)

### Top 10 spenders and vendors

In [44]:
# Each year's total expenditure
expenditure_by_year = df_expenditure.groupby('Year') 
expenditure_by_year['Amount'].sum()

Year
2015    1.826150e+07
2016    5.248286e+07
2017    1.539622e+07
2018    9.433445e+07
2019    1.809889e+07
2020    7.093164e+07
2021    1.910696e+07
2022    2.783699e+09
2023    4.029574e+06
Name: Amount, dtype: float64

In [45]:
expenditure_by_year['TotalAmount'].sum()

Year
2015    1.862398e+07
2016    5.289815e+07
2017    1.567732e+07
2018    9.499453e+07
2019    1.817134e+07
2020    7.180199e+07
2021    1.927631e+07
2022    2.797621e+09
2023    4.039811e+06
Name: TotalAmount, dtype: float64

In [46]:
exp_by_year_spender = df_expenditure.groupby(
    ['Year', 'Spender Name'])['TotalAmount'].sum().reset_index()

top_10_spender = exp_by_year_spender.groupby('Year').apply(
    lambda group: group.nlargest(10, 'TotalAmount')).reset_index(drop=True)

In [47]:
top_10_spender[-10:]

Unnamed: 0,Year,Spender Name,TotalAmount
80,2023,All of Mpls,687248.7
81,2023,North Central States Carpenters PAC,464725.0
82,2023,TakeAction Political Fund,463045.58
83,2023,Laborers District Council of Minn & ND Pol Fund,431124.75
84,2023,AFSCME Council 5 PEOPLE Fund,217654.19
85,2023,Sheet Metal Workers PAC 10,181700.0
86,2023,Joint Council 32 DRIVE,158782.34
87,2023,IBEW Local 292 Political Education Fund,137814.66
88,2023,MN State Bldg & Construction Trades Cncl Pol Fund,123150.0
89,2023,International Union of Operating Engineers,121200.0


In [48]:
top_10_spender[-20:-10]

Unnamed: 0,Year,Spender Name,TotalAmount
70,2022,MN DFL State Central Committee,536319300.0
71,2022,Alliance for a Better Minnesota Action Fund,407709100.0
72,2022,DFL House Caucus,217041100.0
73,2022,MN for Freedom,154193800.0
74,2022,Safe Accessible Fair Elections Minnesota,131401300.0
75,2022,DAGA MN People's Lawyer Project,110974000.0
76,2022,Advance Minnesota Independent Expenditure Comm...,99112440.0
77,2022,HRCC,91921760.0
78,2022,MN Jobs Coalition Legislative Fund,90781410.0
79,2022,iVote Fund MN,79808070.0


In [49]:
exp_by_year_vendor = df_expenditure.groupby(
    ['Year', 'Vendor name'])['TotalAmount'].sum().reset_index()

top_10_vendor = exp_by_year_vendor.groupby('Year').apply(
    lambda group: group.nlargest(10, 'TotalAmount')).reset_index(drop=True)

In [50]:
top_10_vendor[-10:]

Unnamed: 0,Year,Vendor name,TotalAmount
80,2023,MN DFL State Central Committee,782845.0
81,2023,GRSG Company,500000.0
82,2023,The People's Canvass,423097.61
83,2023,MN DFL Senate Caucus,249643.88
84,2023,MN DFL House Caucus,242777.25
85,2023,AGC of MN,134346.02
86,2023,SMART PAL-Education,120000.0
87,2023,Teamsters Joint Council 32,88938.21
88,2023,All of Mpls,86214.53
89,2023,Apparatus,70000.0


In [51]:
top_10_vendor[-20:-10]

Unnamed: 0,Year,Vendor name,TotalAmount
70,2022,Clarify Agency,264748100.0
71,2022,Great American Media,258232600.0
72,2022,Canal Partners Media,159324900.0
73,2022,Berlin Rosen LTD,110561500.0
74,2022,Red Eagle Media,108209300.0
75,2022,Deliver Strategies,107450600.0
76,2022,Nebo Media,88496650.0
77,2022,Sage Media Planning Placement,87763520.0
78,2022,FP1 Digital LLC,81386340.0
79,2022,"Schultz, Jennifer House Committee",80624090.0


### 3. Compare donation by donor and recipient types


In [52]:
spendor_type_mapping = {
    'PCC': 'Candidate', 
    'PCF': 'Political Committee and Fund', 
    'PTU': 'Political Party Unit'
}

In [53]:
df_2023 = df_expenditure[df_expenditure['Year'] == 2023]
grouped_2023 = df_2023.groupby('Spender type')['TotalAmount'].sum().reset_index()
grouped_2023

Unnamed: 0,Spender type,TotalAmount
0,PCF,4007007.67
1,PTU,32803.71


In [54]:
df_2023

Unnamed: 0,Year,Spender Reg Num,Spender Name,Spender type,Spender sub-type,Vendor name,Vendor state,Vendor city,Amount,Unpaid amount,In-kind?,In-kind descr,Type,Purpose,Affected committee name,Affected committee reg num,TotalAmount
0,2023,30119,IBEW Local 292 Political Education Fund,PCF,PF,Walz Tim Gov Committee,,,2000.00,0.00,No,,Contribution,,"Walz, Tim Gov Committee",18135.0,2000.00
1,2023,30012,Minneapolis Bldg & Construct Trades Council,PCF,PF,Rainville Michael Minneapolis City Councilor,,,350.00,0.00,No,,Contribution,,,,350.00
2,2023,41345,Greater Than,PCF,PC,ActBlue Technical Services,MA,West Somerville,12.84,0.00,No,,General Expenditure,Bank Service Charges and checks: PAC E-Merchan...,,,12.84
3,2023,30588,Sheet Metal Workers PAC 10,PCF,PF,Johnson Cheniqua Neighbors for,MN,St. Paul,600.00,0.00,No,,General Expenditure,Contribution to local committee,,,600.00
4,2023,30588,Sheet Metal Workers PAC 10,PCF,PF,Koski Emily Minneapolis City Councilor,,,600.00,0.00,No,,Contribution,,,,600.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1750,2023,30013,Joint Council 32 DRIVE,PCF,PF,Senate Victory Fund (SVF),,,2500.00,0.00,No,,Contribution,,Senate Victory Fund (SVF),20013.0,2500.00
1751,2023,40712,Laborers District Council of Minn & ND Pol Fund,PCF,PF,Do-Goodbiz Inc,MN,St Paul,330.44,330.44,No,,Independent Expenditure,Mailing / Voter List Rent/Purchase: Mailing Se...,,,660.88
1752,2023,30588,Sheet Metal Workers PAC 10,PCF,PF,Acomb Acomb Patty House Committee Acomb Patty ...,,,1000.00,0.00,No,,Contribution,,"Acomb, Patty House Committee",18272.0,1000.00
1753,2023,30154,St Paul Firefighters Local 21 Political Action...,PCF,PF,"Thornberg, Kyle",MN,,1893.80,0.00,No,,General Expenditure,Conference and meeting fees: Conference Reimbu...,,,1893.80


In [55]:
grouped = df_expenditure.groupby(['Year', 'Spender type'])['TotalAmount'].sum().reset_index()
grouped['Full Spender Type'] = grouped['Spender type'].map(spendor_type_mapping)

fig = px.bar(
    grouped,
    x='Year',
    y='TotalAmount',
    color='Full Spender Type',
    title='Expenditure by Spender Type from 2015 to 2022',
    labels={"Year": "Year", "TotalAmount": "Total Expenditure Log", "Full Spender Type": "Spender type"},
    category_orders={"Full Spender Type": sorted(spendor_type_mapping.values())}
)
fig.update_yaxes(type='log')
fig.show()

In [56]:
df_expenditure.dtypes

Year                            int64
Spender Reg Num                 int64
Spender Name                   object
Spender type                   object
Spender sub-type               object
Vendor name                    object
Vendor state                   object
Vendor city                    object
Amount                        float64
Unpaid amount                 float64
In-kind?                       object
In-kind descr                  object
Type                           object
Purpose                        object
Affected committee name        object
Affected committee reg num    float64
TotalAmount                   float64
dtype: object