In [76]:
import pandas as pd
import requests
import datetime
import zipfile
import os
import numpy as np

## Programmatically downloading zipped file of Candidates running in the 2024 election cycle.

### Step 1. Create the directories ./zipped and ./unzipped

In [3]:
zip_path = "./zipped/"
unzipped_path = "./unzipped/"

if not os.path.exists(zip_path):
    os.makedirs(zip_path)

if not os.path.exists(unzipped_path):
    os.makedirs(unzipped_path)

### Step 2. Download the header file.

In [4]:
candidate_header_url = "https://www.fec.gov/files/bulk-downloads/data_dictionaries/cn_header_file.csv"
header_req = requests.get(candidate_header_url)

header_output = "./candidate_header24.csv"

with open(header_output, 'wb') as header_file:
    header_file.write(header_req.content)

print("Candidate header file added!")

Candidate header file added!


### Step 3. Download the Candidate Zip file.

In [5]:
cn_url = "https://www.fec.gov/files/bulk-downloads/2024/cn24.zip"
cn_req = requests.get(cn_url)

current_date = datetime.datetime.now()
current_date_fmt = current_date.strftime("%Y-%m-%d")

cn_path = f"./zipped/cn_2024_{current_date_fmt}.zip"

with open(cn_path, "wb") as cn_file:
    cn_file.write(cn_req.content)

print(f"File succefully added to {cn_path} ")

File succefully added to ./zipped/cn_2024_2024-02-06.zip 


#### Step 4. Extact all files to ./unzipped.

In [6]:
extract_path = f"./unzipped/cn24_{current_date_fmt}/"
with zipfile.ZipFile(cn_path) as extract_zip:
    extract_zip.extractall(extract_path)

print(f"Files extracted to path: {extract_path}")

Files extracted to path: ./unzipped/cn24_2024-02-06/


### Step 4. Convert to .csv and add header to file.

In [72]:
cn_header = pd.read_csv("./candidate_header24.csv")
# path = ""
cn_df = pd.read_csv(extract_path + "cn.txt", sep="|", names=cn_header.columns)

display(cn_df.head(n=10))
print(f"Total number of Candidates recorded: {len(cn_df)}")

Unnamed: 0,CAND_ID,CAND_NAME,CAND_PTY_AFFILIATION,CAND_ELECTION_YR,CAND_OFFICE_ST,CAND_OFFICE,CAND_OFFICE_DISTRICT,CAND_ICI,CAND_STATUS,CAND_PCC,CAND_ST1,CAND_ST2,CAND_CITY,CAND_ST,CAND_ZIP
0,H0AK00105,"LAMB, THOMAS",NNE,2020,AK,H,0.0,C,N,C00607515,1861 W LAKE LUCILLE DR,,WASILLA,AK,99654.0
1,H0AL01055,"CARL, JERRY LEE, JR",REP,2024,AL,H,1.0,I,C,C00697789,PO BOX 852138,,MOBILE,AL,36685.0
2,H0AL01097,"AVERHART, JAMES",DEM,2024,AL,H,2.0,C,C,C00708867,811 SPRINGHILL AV,,MOBILE,AL,36602.0
3,H0AL02087,"ROBY, MARTHA",REP,2020,AL,H,2.0,I,P,C00462143,,,MONTGOMERY,,
4,H0AL02137,"DISMUKES, WILL",REP,2020,AL,H,2.0,O,P,C00714337,PO BOX 6811188,,PRATTVILLE,AL,36068.0
5,H0AL02160,"BROWN, THOMAS WILLIAM JR.",REP,2020,AL,H,2.0,O,P,C00723478,110 RIVERVIEW DR,,NEWTON,AL,36352.0
6,H0AL02202,"HARVEY-HALL, PHYLLIS",DEM,2024,AL,H,2.0,C,C,C00728873,P.O. BOX 11564,,MONTGOMERY,AL,36111.0
7,H0AL03192,"THOMPSON, HANNAH",DEM,2020,AL,H,3.0,C,N,C00681452,2181 N BROADWAY,,ALEXANDER CITY,AL,35010.0
8,H0AL05049,"CRAMER, ROBERT E ""BUD"" JR",DEM,2008,AL,H,5.0,I,P,C00239038,PO BOX 2621,,HUNTSVILLE,AL,35804.0
9,H0AL05163,"BROOKS, MO",REP,2022,AL,H,5.0,I,P,C00464149,7610 FOXFIRE DR.,,HUNTSVILLE,AL,35802.0


Total number of Candidates recorded: 8390


In [73]:
print(cn_df.dtypes)

CAND_ID                  object
CAND_NAME                object
CAND_PTY_AFFILIATION     object
CAND_ELECTION_YR          int64
CAND_OFFICE_ST           object
CAND_OFFICE              object
CAND_OFFICE_DISTRICT    float64
CAND_ICI                 object
CAND_STATUS              object
CAND_PCC                 object
CAND_ST1                 object
CAND_ST2                 object
CAND_CITY                object
CAND_ST                  object
CAND_ZIP                float64
dtype: object


### Step 5. Minor data cleaning.

In [77]:
cn_df["CAND_OFFICE_DISTRICT"] = cn_df["CAND_OFFICE_DISTRICT"].fillna(0) #Fills all NaN with 0
cn_df["CAND_OFFICE_DISTRICT"] = cn_df["CAND_OFFICE_DISTRICT"].astype(int)

# cn_df["CAND_ZIP"] = cn_df["CAND_ZIP"].fillna(0)
# cn_df["CAND_ZIP"] = cn_df["CAND_ZIP"].astype(int) #To remove zero padding
cn_df["CAND_ZIP"] = cn_df["CAND_ZIP"].astype(str) #To adhere to ERD type
cn_df["CAND_ZIP"] = cn_df["CAND_ZIP"].str.split('.').str[0]
cn_df.loc[cn_df["CAND_ZIP"] == 'nan00', 'CAND_ZIP'] = np.nan #The line above results in nan becoming "nan00". This converts them to proper NaN values

display(cn_df.head(n=10))

Unnamed: 0,CAND_ID,CAND_NAME,CAND_PTY_AFFILIATION,CAND_ELECTION_YR,CAND_OFFICE_ST,CAND_OFFICE,CAND_OFFICE_DISTRICT,CAND_ICI,CAND_STATUS,CAND_PCC,CAND_ST1,CAND_ST2,CAND_CITY,CAND_ST,CAND_ZIP
0,H0AK00105,"LAMB, THOMAS",NNE,2020,AK,H,0,C,N,C00607515,1861 W LAKE LUCILLE DR,,WASILLA,AK,99654.0
1,H0AL01055,"CARL, JERRY LEE, JR",REP,2024,AL,H,1,I,C,C00697789,PO BOX 852138,,MOBILE,AL,36685.0
2,H0AL01097,"AVERHART, JAMES",DEM,2024,AL,H,2,C,C,C00708867,811 SPRINGHILL AV,,MOBILE,AL,36602.0
3,H0AL02087,"ROBY, MARTHA",REP,2020,AL,H,2,I,P,C00462143,,,MONTGOMERY,,
4,H0AL02137,"DISMUKES, WILL",REP,2020,AL,H,2,O,P,C00714337,PO BOX 6811188,,PRATTVILLE,AL,36068.0
5,H0AL02160,"BROWN, THOMAS WILLIAM JR.",REP,2020,AL,H,2,O,P,C00723478,110 RIVERVIEW DR,,NEWTON,AL,36352.0
6,H0AL02202,"HARVEY-HALL, PHYLLIS",DEM,2024,AL,H,2,C,C,C00728873,P.O. BOX 11564,,MONTGOMERY,AL,36111.0
7,H0AL03192,"THOMPSON, HANNAH",DEM,2020,AL,H,3,C,N,C00681452,2181 N BROADWAY,,ALEXANDER CITY,AL,35010.0
8,H0AL05049,"CRAMER, ROBERT E ""BUD"" JR",DEM,2008,AL,H,5,I,P,C00239038,PO BOX 2621,,HUNTSVILLE,AL,35804.0
9,H0AL05163,"BROOKS, MO",REP,2022,AL,H,5,I,P,C00464149,7610 FOXFIRE DR.,,HUNTSVILLE,AL,35802.0


In [78]:
source_path = f"../source/cn24_{current_date_fmt}.csv"
cn_df.to_csv(source_path, sep=",", index=False)
print(f"Source file for CCL has been created in {source_path}.")

Source file for CCL has been created in ../source/cn24_2024-02-06.csv.


In [79]:
final_csv = pd.read_csv(f"../source/cn24_{current_date_fmt}.csv", sep=",")
display(final_csv.head(n=20))

Unnamed: 0,CAND_ID,CAND_NAME,CAND_PTY_AFFILIATION,CAND_ELECTION_YR,CAND_OFFICE_ST,CAND_OFFICE,CAND_OFFICE_DISTRICT,CAND_ICI,CAND_STATUS,CAND_PCC,CAND_ST1,CAND_ST2,CAND_CITY,CAND_ST,CAND_ZIP
0,H0AK00105,"LAMB, THOMAS",NNE,2020,AK,H,0,C,N,C00607515,1861 W LAKE LUCILLE DR,,WASILLA,AK,99654.0
1,H0AL01055,"CARL, JERRY LEE, JR",REP,2024,AL,H,1,I,C,C00697789,PO BOX 852138,,MOBILE,AL,36685.0
2,H0AL01097,"AVERHART, JAMES",DEM,2024,AL,H,2,C,C,C00708867,811 SPRINGHILL AV,,MOBILE,AL,36602.0
3,H0AL02087,"ROBY, MARTHA",REP,2020,AL,H,2,I,P,C00462143,,,MONTGOMERY,,
4,H0AL02137,"DISMUKES, WILL",REP,2020,AL,H,2,O,P,C00714337,PO BOX 6811188,,PRATTVILLE,AL,36068.0
5,H0AL02160,"BROWN, THOMAS WILLIAM JR.",REP,2020,AL,H,2,O,P,C00723478,110 RIVERVIEW DR,,NEWTON,AL,36352.0
6,H0AL02202,"HARVEY-HALL, PHYLLIS",DEM,2024,AL,H,2,C,C,C00728873,P.O. BOX 11564,,MONTGOMERY,AL,36111.0
7,H0AL03192,"THOMPSON, HANNAH",DEM,2020,AL,H,3,C,N,C00681452,2181 N BROADWAY,,ALEXANDER CITY,AL,35010.0
8,H0AL05049,"CRAMER, ROBERT E ""BUD"" JR",DEM,2008,AL,H,5,I,P,C00239038,PO BOX 2621,,HUNTSVILLE,AL,35804.0
9,H0AL05163,"BROOKS, MO",REP,2022,AL,H,5,I,P,C00464149,7610 FOXFIRE DR.,,HUNTSVILLE,AL,35802.0
