In [1]:
import pandas as pd
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

# MN Contribution EDA

## 1. Read in and Preprocess Datasets

### 1.1 Read in datasets

In [2]:
# Read in candidate-recipient contribution data
df_ag = pd.read_csv('/project/data/cand_con.csv/AG.csv')
df_ap = pd.read_csv('/project/data/cand_con.csv/AP.csv')
df_dc = pd.read_csv('/project/data/cand_con.csv/DC.csv')
df_gc = pd.read_csv('/project/data/cand_con.csv/GC.csv')
df_house = pd.read_csv('/project/data/cand_con.csv/House.csv')
df_sa = pd.read_csv('/project/data/cand_con.csv/SA.csv')
df_sc = pd.read_csv('/project/data/cand_con.csv/SC.csv')
df_senate = pd.read_csv('/project/data/cand_con.csv/Senate.csv')
df_ss = pd.read_csv('/project/data/cand_con.csv/SS.csv')
df_st = pd.read_csv('/project/data/cand_con.csv/ST.csv')

# Read in non-candidate-recipient contribution data
df_non_cand = pd.read_csv('/project/data/non_candidate_con.csv')

10 datasets on candidate-recipient contributions and 1 dataset on non-candidate-recipient contributions. They are seperate and not relational

### 1.2 Check for DataFrames' column consistency

In [3]:
# First check for DataFrames' column numbers
df_lst = [df_ag, df_ap, df_dc, df_gc, df_house, df_sa, df_sc, df_ss, df_st, df_senate, df_non_cand]
df_lens = []
for df in df_lst:
    df_lens.append(df.shape[1])
print(df_lens)

[13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 11]


In [4]:
from utils.MN_util import datasets_col_consistent

datasets_col_consistent(df_lst[:-1])

All dfs have consistent columns


### 1.3 Adjust for DataFrames' column consistency

In [5]:
df_ag.columns, df_non_cand.columns

(Index(['OfficeSought', 'Party', 'District', 'CandRegNumb', 'CandFirstName',
        'CandLastName', 'CommitteeName', 'DonationDate', 'DonorType',
        'DonorName', 'DonationAmount', 'InKindDonAmount',
        'InKindDescriptionText'],
       dtype='object'),
 Index(['PCFRegNumb', 'Committee', 'ETType', 'ETSubType', 'DonationDate',
        'DonorType', 'DonorRegNumb', 'DonorName', 'DonationAmount',
        'InKindDonAmount', 'InKindDescriptionText'],
       dtype='object'))

Based on the project need and dataset consistency, use these columns: RegNumb, RecipientType, OfficeSought, CandFirstName,  CandLastName, Committee, DonationDate, DonorType, DonorName, DonationAmount, InKindDonAmount, InKindDescriptionText

In [6]:
from utils.MN_util import preprocess_candidate_df

df_ag = preprocess_candidate_df(df_ag)
df_ap = preprocess_candidate_df(df_ap)
df_dc = preprocess_candidate_df(df_dc)
df_gc = preprocess_candidate_df(df_gc)
df_house = preprocess_candidate_df(df_house)
df_sa = preprocess_candidate_df(df_sa)
df_sc = preprocess_candidate_df(df_sc)
df_ss = preprocess_candidate_df(df_ss)
df_st = preprocess_candidate_df(df_st)
df_senate = preprocess_candidate_df(df_senate)

In [7]:
from utils.MN_util import preprocess_noncandidate_df
df_non_cand = preprocess_noncandidate_df(df_non_cand)

In [8]:
from utils.MN_util import preprocess_contribution_df

new_df_lst = [df_ag, df_ap, df_dc, df_gc, df_house, df_sa, df_sc, df_ss, df_st, 
              df_senate, df_non_cand]

contribution_df = preprocess_contribution_df(new_df_lst)

In [9]:
contribution_df['DonorType'].unique()

array(['I', 'F', 'C', 'O', 'L', 'P', 'H', 'U', 'S', nan, 'B'],
      dtype=object)

In [10]:
contribution_df.dtypes

OfficeSought                 object
RegNumb                       int64
CandFirstName                object
CandLastName                 object
Committee                    object
Date                 datetime64[ns]
DonorType                    object
DonorName                    object
Amount                      float64
InKindAmount                float64
InKindDescription            object
RecipientType                object
Year                          int64
TotalAmount                 float64
dtype: object

#### Donor Types:
1. C: Candidate Committee (limited to state-level candidates who had a principal campaign committee registered with the Board from which the contribution was made)
2. I: Non-lobbyist individual 
3. L: Lobbyist  
4. F: Political Committee/Fund  
5. S: Supporting association of a political fund registered with the Board that donates to its own political fund
6. P: Political party unit
7. H: Local candidate committee (limited to candidates within Hennepin County who satisfy the definition of local candidate, did not exist until 2022)
8. O: Other (catch-all category that in some cases includes businesses, supporting associations of political funds registered with the Board that donate to their own political fund, associations that are not registered with the Board, and any entity that does not fall within one of the other categories)
9. U: Association not registered with the Board (may include a committee registered with the FEC or a regulatory committee in another state, a 501(c)(4), 501(c)(6), or 527 nonprofit organization, the campaign committee of a candidate for local office (excluding certain Hennepin County candidates from 2022 onward), etc.)
10. B: Business (company & corporation)

In [11]:
contribution_df['RecipientType'].unique()

array(['Candidate', 'PCF', 'PTU'], dtype=object)

In [12]:
contribution_df['OfficeSought'].unique()

array(['AG', nan, 'GC', 'House', 'Senate', 'SA', 'SS', 'SC', 'DC', 'AP',
       'ST'], dtype=object)

#### Recipient Types:
- Candidate
- PCF: Political committee or fund
- PTU: Political party unit

#### Office Types (within candidate recipient):
- AG = Attorney General
- AP = State Appeals Court Judge
- DC = State District Court Judge
- GC = Governor
- House = State Representative
- SA = State Auditor
- SC = State Supreme Court Justice
- SS = Secretary of State
- ST = State Treasurer (this office was abolished in 2003 and no longer exists)
- Senate = State Senator

### 1.4 Check Missing Values

In [13]:
contribution_df.isna().sum()

OfficeSought          483861
RegNumb                    0
CandFirstName         483861
CandLastName          483861
Committee                467
Date                     536
DonorType                580
DonorName                477
Amount                     0
InKindAmount               0
InKindDescription    3508947
RecipientType              0
Year                       0
TotalAmount                0
dtype: int64

In [14]:
print('Total number of contribution entries = ', len(contribution_df))
no_amount = len(contribution_df[contribution_df['TotalAmount'] == 0])
print('Total number of nonclassifiable contribution amount = ', no_amount)
no_don = contribution_df['DonorName'].isna().sum()
print('Total number of nonclassifiable donors = ', no_don)
unclassifiable_prop = round((no_amount+no_don)/len(contribution_df),6)
print('Proportion of nonclassifiable entries =', f"{unclassifiable_prop*100}%")

Total number of contribution entries =  3548873
Total number of nonclassifiable contribution amount =  336051
Total number of nonclassifiable donors =  477
Proportion of nonclassifiable entries = 9.4827%


1. 467 of the 'nan' contributions belong to "Registration Fee for Netroots Event", which is a non-profit organization that help progressive activists. Based on their column values, these contributions have no monetary amount, no donor, and no recipient.
2. 483861 nan OfficeSought are mostly those recipients are non-candidates
3. Contribution entries with no contribution amount, recipient information, or donor information should be dropped

### 1.5 Drop Non-classifiable Contribution Data

In [15]:
from utils.MN_util import drop_nonclassifiable
contribution_df = drop_nonclassifiable(contribution_df)

: 

## 2. Top 10
### 2.1 Top 10 Donors

In [None]:
filtered_df = contribution_df[(contribution_df['Year'] >= 2018) & (contribution_df['Year'] <= 2023) & (contribution_df['Year'] != -1)]
donation_by_year = filtered_df.groupby('Year')['TotalAmount'].sum()
donation_by_year

Year
2018    47781848.87
2019    25764905.60
2020    34614419.07
2021    26898778.21
2022    57692246.12
2023      264961.92
Name: TotalAmount, dtype: float64

In [None]:
# Group by 'Year' and 'Contributor' for total contribution for each contributor in each year
don_by_year_contributor = filtered_df.groupby(
    ['Year', 'DonorName'])['TotalAmount'].sum().reset_index()

# Find the top 10 contributors
top_10_contributors = don_by_year_contributor.groupby('Year').apply(
    lambda group: group.nlargest(10, 'TotalAmount')).reset_index(drop=True)

In [None]:
top_10_contributors

Unnamed: 0,Year,DonorName,TotalAmount
0,2018,Minn DFL State Central Committee,519799.42
1,2018,Minn Chamber of Commerce Leadership Fd,462600.0
2,2018,Minn Nurses Assn Pol Comm (MNA-PC),322750.0
3,2018,MTA PAC (fka SITCO PAC),301400.0
4,2018,Education Minn PAC,291200.0
5,2018,Shakopee Mdewakanton Sioux,290550.0
6,2018,Faegre Baker Daniels State-Reg Pol Fund,276350.0
7,2018,IBEW Minn State Council PAC,188500.0
8,2018,"Haselow, Robert",175025.0
9,2018,Minn Dental Political Action Committee,167350.0


### 2.2 Top 10 Recipients

In [None]:
contribution_df1 = filtered_df.copy(deep=True)
contribution_df1['CandLastName'].fillna('NA', inplace=True)
contribution_df1['CandFirstName'].fillna('NA', inplace=True)
contribution_df1['Committee'].fillna('NA', inplace=True)
by_year_recipients = contribution_df1.groupby(
    ['Year', 'RegNumb', 'RecipientType', 'CandLastName', 'CandFirstName', 'Committee'])['TotalAmount'].sum().reset_index()

top_10_recipient = by_year_recipients.groupby('Year').apply(
    lambda group: group.nlargest(10, 'TotalAmount')).reset_index(drop=True)

In [None]:
top_10_recipient

Unnamed: 0,Year,RegNumb,RecipientType,CandLastName,CandFirstName,Committee,TotalAmount
0,2018,18135,Candidate,Walz,Tim,Tim Walz for Governor,8095546.97
1,2018,17641,Candidate,Johnson,Jeff,Johnson (Jeff) for Governor,4060066.88
2,2018,18336,Candidate,Ellison,Keith,Keith Ellison for Attorney General,1708185.84
3,2018,18133,Candidate,Wardlow,Doug,Doug Wardlow for Attorney General,1621028.92
4,2018,18125,Candidate,Murphy,Erin,Murphy (Erin) for Minnesota,1396061.64
5,2018,17653,Candidate,Simon,Steve,Simon (Steve) for Secretary of State,904363.13
6,2018,18292,Candidate,Pawlenty,Tim,Tim Pawlenty for Governor,545366.12
7,2018,15677,Candidate,Hortman,Melissa,Melissa Hortman Campaign Committee,535150.0
8,2018,17902,Candidate,Edelson,Heather,Heather Edelson for House,492906.69
9,2018,12604,Candidate,Davids,Gregory,People for (Gregory) Davids Committee,460620.0


## 3. Compare donation by donor and recipient types

### 3.1 Compare donation by donor types

In [None]:
donor_type_mapping = {
    'B': 'Business',
    'C': 'Candidate committee',
    'F': 'Political committee or fund',
    'H': 'Local candidate committee registered with Hennepin County',
    'I': 'Non-lobbyist individual',
    'L': 'Lobbyist',
    'O': 'Other',
    'P': 'Political party unit',
    'S': 'Self',
    'U': 'Association not registered with the Board'
}

In [None]:
grouped2 = filtered_df.groupby(['Year', 'DonorType'])['TotalAmount'].sum().reset_index()

grouped2['FullDonorType'] = grouped2['DonorType'].map(donor_type_mapping)

fig = px.bar(
    grouped2,
    x='Year',
    y='TotalAmount',
    color='FullDonorType',
    title='Donations by Donor Type From 2018 To 2023',
    labels={"Year": "Year", "TotalAmount": "Total Contributions", "FullDonorType": "Donor Type"},
    category_orders={"FullDonorType": sorted(donor_type_mapping.values())}
)

fig.show()

In [None]:
filtered_df2 = contribution_df[contribution_df['Year'] == 2022]
grouped3 = filtered_df2.groupby(['Year', 'DonorType'])['TotalAmount'].sum().reset_index()

grouped3['FullDonorType'] = grouped3['DonorType'].map(donor_type_mapping)

fig = px.bar(
    grouped3,
    x='Year',
    y='TotalAmount',
    color='FullDonorType',
    title='Donations by Donor Type in 2022',
    labels={"DonationYear": "Year", "TotalAmount": "Total Contributions", "FullDonorType": "Donor Type"},
    category_orders={"FullDonorType": sorted(donor_type_mapping.values())}
)

fig.show()

#### Observations and Interpretations
1. Individuals, excluding lobbyists, constitute the largest share of contributions in the MN dataset.
2. The second most substantial contributor category is General Purpose Political Committee or Fund, followed by lobbyists.
3. Contributions from other donor types are notably lower throughout the years.
4. Analyzing a sample from 2018 to 2022, we observe a cyclical pattern with a major increase in contributions, followed by three years of reduced contribution totals. This cycle aligns with the four-year election cycle.
5. From 1998 to 2023, there are several years with significantly lower contribution amount: 1999, 2001, 2003, 2007, 2011.

### 3.2 Compare donation by recipient types

In [None]:
grouped4 = filtered_df.groupby(['Year', 'RecipientType'])['TotalAmount'].sum().reset_index()

fig = px.bar(
    grouped4,
    x='Year',
    y='TotalAmount',
    color='RecipientType',
    title='Donations by Recipient Type from 2018 to 2023',
    labels={"Year": "Year", "TotalAmount": "Total Contributions", 'RecipientType': 'Recipient Type'},
)

fig.show()

In [None]:
grouped4 = filtered_df2.groupby(['Year', 'RecipientType'])['TotalAmount'].sum().reset_index()

fig = px.bar(
    grouped4,
    x='Year',
    y='TotalAmount',
    color='RecipientType',
    title='Donations by Recipient Type in 2022',
    labels={"Year": "Year", "TotalAmount": "Total Contributions", 'RecipientType': 'Recipient Type'},
)

fig.show()

#### Observations and Interpretations
1. Candidates, as the recipients, make up the overwhelming majority of contributions.
2. Examining the period from 1998 to 2023, a distinct cyclical pattern emerges, characterized by alternating years of increased and decreased contributions, which may correspond to congressional elections or MN state house representatives elections which take place every two years.
3. Starting in 2012, recipient types "Political Committee or Fund" and "Political Party Unit" began receiving a larger share of contributions compared to prior years.

In [None]:
race_type_mapping = {
    'AG': 'Attorney General',
    'AP': 'Political committee or fund',
    'DC': 'Candidate committee',
    'GC': 'Other',
    'House': 'Lobbyist',
    'SA': 'State Auditor',
    'SC': 'Local candidate committee registered with Hennepin County',
    'SS': 'Secretary of State',
    'Senate': 'Senate',
}

In [None]:
grouped5 = filtered_df.groupby(['Year', 'OfficeSought'])['TotalAmount'].sum().reset_index()
grouped5['FullRaceType'] = grouped5['OfficeSought'].map(race_type_mapping)

fig = px.bar(
    grouped5,
    x='Year',
    y='TotalAmount',
    color='FullRaceType',
    title='Donations by Candidate Recipient Race From 2018 To 2023',
    labels={"Year": "Year", "TotalAmount": "Total Contributions", 'FullRaceType': 'Full Race Type'},
    category_orders={"FullRaceType": sorted(race_type_mapping.values())}
)

fig.show()

Clearly, state senators and house representatives recieve the most contributions

In [None]:
contribution_df[contribution_df['RegNumb']==30119]

Unnamed: 0,OfficeSought,RegNumb,CandFirstName,CandLastName,Committee,Date,DonorType,DonorName,Amount,InKindAmount,InKindDescription,RecipientType,Year,TotalAmount
22,,30119,,,IBEW Local 292 Political Education Fund,2023-03-05,O,IBEW Local 292,55.76,0.0,,PCF,2023,55.76
58,,30119,,,IBEW Local 292 Political Education Fund,2023-03-09,O,IBEW Local 292,262.22,0.0,,PCF,2023,262.22
95,,30119,,,IBEW Local 292 Political Education Fund,2023-02-17,O,IBEW Local 292,550.42,0.0,,PCF,2023,550.42
103,,30119,,,IBEW Local 292 Political Education Fund,2023-02-23,O,IBEW Local 292,133.66,0.0,,PCF,2023,133.66
108,,30119,,,IBEW Local 292 Political Education Fund,2023-02-28,O,IBEW Local 292,75.10,0.0,,PCF,2023,75.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1621101,,30119,,,IBEW Local 292 Political Education Fund,2012-10-18,O,IBEW Local 292,62.30,0.0,,PCF,2012,62.30
1624800,,30119,,,IBEW Local 292 Political Education Fund,2012-10-11,O,IBEW Local 292,196.12,0.0,,PCF,2012,196.12
2297031,,30119,,,IBEW Local 292 Political Education Fund,2008-12-30,O,IBEW Local 292,59.31,0.0,,PCF,2008,59.31
2298939,,30119,,,IBEW Local 292 Political Education Fund,2008-10-14,O,IBEW Local 292,351.60,0.0,,PCF,2008,351.60


# MN Expenditure EDA

### 1. Read in and Preprocess Datasets

In [2]:
df_general = pd.read_csv('/project/data/general_exp_con.csv')
df_independent = pd.read_csv('/project/data/independent_exp.csv')

In [3]:
from utils.MN_util import preprocess_general_exp_df
df_general = preprocess_general_exp_df(df_general)

In [4]:
from utils.MN_util import preprocess_independent_exp_df
df_independent = preprocess_independent_exp_df(df_independent)

In [5]:
df_independent.columns==df_general.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])

In [6]:
df_independent.dtypes

SpenderRegNum                int64
SpenderName                 object
SpenderType                 object
VendorName                  object
Amount                     float64
UnpaidAmount               float64
Date                        object
Year                         int64
Purpose                     object
Type                        object
In-kind?                    object
InKindDescription           object
AffectedCommitteeName       object
AffectedCommitteeRegNum    float64
dtype: object

In [7]:
from utils.MN_util import preprocess_expenditure_df

df_expenditure = preprocess_expenditure_df([df_general, df_independent])

In [8]:
df_expenditure

Unnamed: 0,SpenderRegNum,SpenderName,SpenderType,VendorName,Amount,UnpaidAmount,Date,Year,Purpose,Type,In-kind?,InKindDescription,AffectedCommitteeName,AffectedCommitteeRegNum
0,15863,"Hooten, Carol Dist Court Committee",PCC,,102.84,0.0,10/10/2015,2015,Repayment of part of loan,Other Disbursement,No,,,
1,16008,"Faust, Timothy D House Committee",PCC,Kanabec Publications,429.11,0.0,02/18/2015,2015,Advertising - Print: $4.96 was service charge ...,Campaign Expenditure,No,,,
2,16248,"Swails, Marsha G House Committee",PCC,,1293.77,0.0,03/25/2015,2015,,Contribution,No,,womenwinning State PAC,40268.0
3,16248,"Swails, Marsha G House Committee",PCC,Women Candidate Development Coalition,1293.76,0.0,03/25/2015,2015,contribution to non-profit,Other Disbursement,No,,,
4,16276,"Sawatzke, Pat Senate Committee",PCC,UND Foundation,500.00,0.0,07/30/2015,2015,charitable contribution,Other Disbursement,No,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664185,80026,MN Assoc of Professional Employees Political Fund,PCF,MN Association of Professional Employees,304.28,0.0,10/21/2020,2020,Employee Expense: Staff costs,Independent Expenditure,No,,"Isaacson, Jason (Ike) Senate Committee",17929.0
664186,80026,MN Assoc of Professional Employees Political Fund,PCF,No Coast Workshop,217.39,0.0,10/13/2016,2016,Printing and Photocopying: Independent Expendi...,Independent Expenditure,No,,"Newton, Jerry Senate Committee",18004.0
664187,80026,MN Assoc of Professional Employees Political Fund,PCF,No Coast Workshop,217.40,0.0,10/13/2016,2016,Printing and Photocopying: Independent Expendi...,Independent Expenditure,No,,"Calvert, Deborah (Deb) Senate Committee",18032.0
664188,80026,MN Assoc of Professional Employees Political Fund,PCF,No Coast Workshop,144.32,0.0,11/03/2016,2016,Postage/ Delivery: Independent Expenditure Mai...,Independent Expenditure,No,,"Maye Quade, Erin House Committee",17885.0


In [9]:
print(df_expenditure['SpenderType'].unique())
print(df_expenditure['Type'].unique())

['PCC' 'PCF' 'PTU']
['Other Disbursement' 'Campaign Expenditure' 'Contribution'
 'Non-Campaign Disbursement' 'General Expenditure'
 'Ballot Question Expenditure' 'Party Unit' 'Candidate Committee'
 'Political Committee/Fund' 'Independent Expenditure']


### 2. Check Missing Values and Drop Unclassifiable Data

In [10]:
df_expenditure.isna().sum()

SpenderRegNum                   0
SpenderName                    81
SpenderType                     0
VendorName                  38386
Amount                          0
UnpaidAmount                    0
Date                            0
Year                            0
Purpose                     66290
Type                            0
In-kind?                        0
InKindDescription          639990
AffectedCommitteeName      236995
AffectedCommitteeRegNum    236693
dtype: int64

In [11]:
print('Total number of expenditure entries =', len(df_expenditure))
no_amount = len(df_expenditure[df_expenditure['Amount'] == 0])
print('Total number of nonclassifiable expenditure amount =', no_amount)
no_spender = df_expenditure['SpenderName'].isna().sum()
print('Total number of nonclassifiable spenders =', no_spender)

unclassifiable_prop = round((no_amount+no_spender)/len(df_expenditure),6)
print('Proportion of nonclassifiable entries =', f"{unclassifiable_prop*100}%")

Total number of expenditure entries = 664190
Total number of nonclassifiable expenditure amount = 355
Total number of nonclassifiable spenders = 81
Proportion of nonclassifiable entries = 0.0656%


In [12]:
from utils.MN_util import drop_nonclassifiable_exp
df_expenditure = drop_nonclassifiable_exp(df_expenditure)

In [13]:
filtered_df = df_expenditure[(df_expenditure['Year'] >= 2018) & (df_expenditure['Year'] <= 2023)]
expenditure_by_year = filtered_df.groupby('Year') 
expenditure_by_year['Amount'].sum()

Year
2018    1.379913e+08
2019    2.612436e+07
2020    1.113496e+08
2021    2.680744e+07
2022    2.799756e+09
2023    4.155524e+06
Name: Amount, dtype: float64

### 3. Top Spenders and Vendors (2018-2023)

#### 3.1 Top 10 Spenders

In [14]:
exp_by_year_spender = filtered_df.groupby(
    ['Year', 'SpenderName'])['Amount'].sum().reset_index()

top_10_spender = exp_by_year_spender.groupby('Year').apply(
    lambda group: group.nlargest(10, 'Amount')).reset_index(drop=True)

In [15]:
top_10_spender

Unnamed: 0,Year,SpenderName,Amount
0,2018,MN DFL State Central Committee,16346670.0
1,2018,Alliance for a Better Minnesota Action Fund,9388784.0
2,2018,DGA Victory Fund,7639284.0
3,2018,2024 Fund,5702886.0
4,2018,DFL House Caucus,5409447.0
5,2018,Education Minn PAC,5224434.0
6,2018,"Walz, Tim Gov Committee",4608298.0
7,2018,MN Victory PAC,3785053.0
8,2018,WIN Minnesota Political Action Fund,3263076.0
9,2018,HRCC,3079447.0


#### 3.2 Top 10 Vendors

In [16]:
exp_by_year_vendor = filtered_df.groupby(
    ['Year', 'VendorName'])['Amount'].sum().reset_index()

top_10_vendor = exp_by_year_vendor.groupby('Year').apply(
    lambda group: group.nlargest(10, 'Amount')).reset_index(drop=True)

In [17]:
top_10_vendor

Unnamed: 0,Year,VendorName,Amount
0,2018,Great American Media,7092650.0
1,2018,Democratic Governors Association,5096131.0
2,2018,Clarify Agency,4289870.0
3,2018,Nebo Media,3538082.0
4,2018,Drake Bank Federal,3040606.0
5,2018,GMMB Inc,2512423.0
6,2018,Berlin Rosen LTD,1446830.0
7,2018,Ax Media,1370301.0
8,2018,Strategic Media for Placement,1326959.0
9,2018,Education Minnesota,1302605.0


### 4. Compare Expenditure by Spender Type and Vendor Type (2018-2023)

In [18]:
spendor_type_mapping = {
    'PCC': 'Candidate', 
    'PCF': 'Political Committee and Fund', 
    'PTU': 'Political Party Unit'
}

grouped = filtered_df.groupby(['Year', 'SpenderType'])['Amount'].sum().reset_index()
grouped['FullSpenderType'] = grouped['SpenderType'].map(spendor_type_mapping)

In [21]:
fig = px.bar(
    grouped,
    x='Year',
    y='Amount',
    color='FullSpenderType',
    title='Expenditure by Spender Type From 2018 To 2023',
    labels={"Year": "Year", "TotalAmount": "Total Expenditure Log", "FullSpenderType": "Spender Type"},
    category_orders={"FullSpenderType": sorted(spendor_type_mapping.values())}
)
fig.update_yaxes(type='log')
fig.show()