# Python and R
Notebook Setup

In [6]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
import pythonimports
import pandas as pd

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load FEC Data

Data is loaded from this FEC link:

https://www.fec.gov/data/browse-data/?tab=bulk-data

In [7]:
import requests_cache
requests = requests_cache.CachedSession('demo_cache')
import zipfile


def download_large_file(file_url, filepath):
    
    r = requests.get(file_url, stream = True)
    
    with open(filepath,"wb") as f:
        for chunk in r.iter_content(chunk_size=1024):
            # writing one chunk at a time to pdf file
            if chunk:
                f.write(chunk)


def load_individual_contributions(headers, data_url):
    """
    Load the individual contributions data.
    """

    filename = data_url.split("/")[-1]
    filepath = f"data/{filename}"

    # Download the file
    download_large_file(data_url, filepath)

    # unzip the file
    with zipfile.ZipFile(filepath, 'r') as zip_ref:
        zip_ref.extractall('data')


def load_fec_data(fec_abbrev, years):
    """
    Load data from the FEC website.
    https://www.fec.gov/data/browse-data/?tab=bulk-data

    file_abbrev - the file abbreviatio (see URLs in data file above)
    years - a list of years to load
    """
    header_url = f"https://www.fec.gov/files/bulk-downloads/data_dictionaries/{fec_abbrev}_header_file.csv"
    headers = requests.get(header_url).text.strip().split(',')

    dfs = []
    for year in years:
        url = f"https://www.fec.gov/files/bulk-downloads/{year}/{fec_abbrev}{str(year)[2:]}.zip"

        if fec_abbrev == "indiv":
            dfs.append(load_individual_contributions(headers, url))
            continue

        print(f"Downloading {url}")
        dfs.append(
            pd.read_csv(url, compression="zip", names=headers, delimiter="|")
        )

    df = pd.concat(dfs)
    df.to_csv(f"data/{fec_abbrev}.csv", index=False)
    return df

In [8]:
candidate_df = load_fec_data("cn", [2022])
candidate_df.head(2)

Downloading https://www.fec.gov/files/bulk-downloads/2022/cn22.zip


Unnamed: 0,CAND_ID,CAND_NAME,CAND_PTY_AFFILIATION,CAND_ELECTION_YR,CAND_OFFICE_ST,CAND_OFFICE,CAND_OFFICE_DISTRICT,CAND_ICI,CAND_STATUS,CAND_PCC,CAND_ST1,CAND_ST2,CAND_CITY,CAND_ST,CAND_ZIP
0,H0AK00105,"LAMB, THOMAS",NNE,2020,AK,H,0.0,C,N,C00607515,1861 W LAKE LUCILLE DR,,WASILLA,AK,99654.0
1,H0AL01055,"CARL, JERRY LEE, JR",REP,2022,AL,H,1.0,I,C,C00697789,PO BOX 852138,,MOBILE,AL,36685.0


In [5]:
linkages_df = load_fec_data("ccl", [2022])
linkages_df.head(2)

Downloading https://www.fec.gov/files/bulk-downloads/2022/ccl22.zip


Unnamed: 0,CAND_ID,CAND_ELECTION_YR,FEC_ELECTION_YR,CMTE_ID,CMTE_TP,CMTE_DSGN,LINKAGE_ID
0,C00713602,2019,2022,C00712851,O,U,237862
1,H0AK00105,2020,2022,C00607515,H,P,237924


In [6]:
committees_df = load_fec_data("cm", [2022])
committees_df.head(2)

Downloading https://www.fec.gov/files/bulk-downloads/2022/cm22.zip


Unnamed: 0,CMTE_ID,CMTE_NM,TRES_NM,CMTE_ST1,CMTE_ST2,CMTE_CITY,CMTE_ST,CMTE_ZIP,CMTE_DSGN,CMTE_TP,CMTE_PTY_AFFILIATION,CMTE_FILING_FREQ,ORG_TP,CONNECTED_ORG_NM,CAND_ID
0,C00000059,"HALLMARK CARDS, INC. PAC (HALLPAC)","KLEIN, CASSIE MS.","2501 MCGEE, MD853",,KANSAS CITY,MO,64108,B,Q,UNK,M,C,"HALLMARK CARDS, INC.",
1,C00000422,AMERICAN MEDICAL ASSOCIATION POLITICAL ACTION ...,"WALKER, KEVIN MR.","25 MASSACHUSETTS AVE, NW",SUITE 600,WASHINGTON,DC,200017400,B,Q,,M,,ALABAMA MEDICAL PAC,


In [None]:
individuals_df = load_fec_data("indiv", [2022])
individuals_df.head(2)

In [9]:
indep_exp_df = load_fec_data("pas2", [2022])
indep_exp_df.head(2)

Downloading https://www.fec.gov/files/bulk-downloads/2022/pas222.zip


Unnamed: 0,CMTE_ID,AMNDT_IND,RPT_TP,TRANSACTION_PGI,IMAGE_NUM,TRANSACTION_TP,ENTITY_TP,NAME,CITY,STATE,...,OCCUPATION,TRANSACTION_DT,TRANSACTION_AMT,OTHER_ID,CAND_ID,TRAN_ID,FILE_NUM,MEMO_CD,MEMO_TEXT,SUB_ID
0,C00761528,N,30R,R2021,202101279413461164,24K,CCM,GEORGIANS FOR KELLY LOEFFLER,ATLANTA,GA,...,,12242020.0,2000,C00729608,S0GA00526,SB23.4137,1492056,,,4012920212139007940
1,C00761528,N,30R,R2021,202101279413461164,24K,CCM,PERDUE FOR SENATE,ATLANTA,GA,...,,12242020.0,2000,C00547570,S4GA11285,SB23.4140,1492056,,,4012920212139007941


In [8]:
other_df = load_fec_data("oth", [2022])
other_df.head(2)

Downloading https://www.fec.gov/files/bulk-downloads/2022/oth22.zip


Unnamed: 0,CMTE_ID,AMNDT_IND,RPT_TP,TRANSACTION_PGI,IMAGE_NUM,TRANSACTION_TP,ENTITY_TP,NAME,CITY,STATE,...,EMPLOYER,OCCUPATION,TRANSACTION_DT,TRANSACTION_AMT,OTHER_ID,TRAN_ID,FILE_NUM,MEMO_CD,MEMO_TEXT,SUB_ID
0,C00504530,N,YE,P,202201319485722236,10J,IND,"ANDEL, DAVID VAN",GRAND RAPIDS,MI,...,VAN ANDEL INSTITUTE,CEO,9272021.0,2700,,SA12.175901,1564785,X,JFC ATTRIB: TAKE BACK THE HOUSE 2022,4021520221407814397
1,C00504530,N,YE,P,202201319485722236,10J,IND,"BARBOUR, ALFRED MR.",SEWICKLEY,PA,...,CONCAST METALS,EXECUTIVE,7122021.0,5000,,SA12.175914,1564785,X,JFC ATTRIB: TAKE BACK THE HOUSE 2022,4021520221407814398


In [7]:
operating_exp_df = load_fec_data("oppexp", [2022])
operating_exp_df.head(2)

Downloading https://www.fec.gov/files/bulk-downloads/2022/oppexp22.zip


Unnamed: 0,CMTE_ID,AMNDT_IND,RPT_YR,RPT_TP,IMAGE_NUM,LINE_NUM,FORM_TP_CD,SCHED_TP_CD,NAME,CITY,...,PURPOSE,CATEGORY,CATEGORY_DESC,MEMO_CD,MEMO_TEXT,ENTITY_TP,SUB_ID,FILE_NUM,TRAN_ID,BACK_REF_TRAN_ID
C00762229,T,2021,TER,202101119398332329,21B,F3X,SB,PARAGON SOLUTIONS,TEMPE,AZ,...,,,,,ORG,4011120212068264862,1486476,500174301,,
C00554311,T,2021,TER,202101289413587231,17,F3,SB,"KLEINHENDLER, HOWARD",LAKEWOOD,NJ,...,,,,,CAN,4020620211121610710,1492225,SB17.4394,,


In [None]:
%%R

require(tidyverse)