# Download raw data from FEC website

- we wanted a reproducible way to download the data
- because the data was too large to add to the git repo
- and it makes for a cleaner implementation

In [25]:
import re
import os
import requests

Set our storage destination:

In [26]:
FEC_PATH = 'fec_data/'

In [27]:
base_url = "https://www.fec.gov/files/bulk-downloads/{year}/{table}{short_year}.zip".format

## A dictionary defining all the different tables to fetch from FEC

In [28]:
tables = {
    'ALL_CANDIDATES': {
        'abbr': 'weball',
        'description': 'https://www.fec.gov/campaign-finance-data/all-candidates-file-description'
    },
    'CANDIDATE_MASTER': {
        'abbr': 'cn',
        'description': 'https://www.fec.gov/campaign-finance-data/candidate-master-file-description/'
    },
    'CANDIDATE_COMMITTEE_LINKAGES': {
        'abbr': 'ccl',
        'description': 'https://www.fec.gov/campaign-finance-data/contributions-individuals-file-description/'
    },
    'HOUSE_SENATE_CAMPAIGNS': {
        'abbr': 'webl',
        'description': 'https://www.fec.gov/campaign-finance-data/current-campaigns-house-and-senate-file-description/'
    },
    'COMMITTEE_MASTER': {
        'abbr': 'cm',
        'description': 'https://www.fec.gov/campaign-finance-data/committee-master-file-description/'
    },
    'PAC_SUMMARY': {
        'abbr': 'webk',
        'description': 'https://www.fec.gov/campaign-finance-data/pac-and-party-summary-file-description/'
    },
    'INDIVIDUAL_CONTRIBUTIONS': {
        'abbr': 'indiv',
        'description': 'https://www.fec.gov/campaign-finance-data/contributions-individuals-file-description/'
    },
    'COMMITTEE_TO_CANDIDATE_AND_IE': {
        'abbr': 'pas2',
        'description': 'https://www.fec.gov/campaign-finance-data/contributions-committees-candidates-file-description/'
    },
    'COMMITTEE_TO_COMMITTEE':{
        'abbr': 'oth',
        'description': 'https://www.fec.gov/campaign-finance-data/any-transaction-one-committee-another-file-description/'
    },
    'OPERATING_EXPENSES': {
        'abbr': 'oppexp',
        'description': 'https://www.fec.gov/campaign-finance-data/operating-expenditures-file-description/'
    }
}

## For loop that goes through and fetches all the data
- sit back with a cup of coffee while you do the heaving lifting for you

In [29]:

for name, info in tables.items():
    print(f"{name}...")
    table_abbr = info['abbr']
    description_url = info['description']
    destination_folder = f"{FEC_PATH}/{table_abbr}"
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # Download Header File:
    try:
        url = f'https://www.fec.gov/files/bulk-downloads/data_dictionaries/{table_abbr}_header_file.csv'
        filename = re.split('/', url)[-1]
        r = requests.get(url)
        with open(f'{destination_folder}/{filename}', 'wb') as f:
            f.write(r.content)
    except Exception as e:
        print(f"Failed to load headers for {name}: {str(e)} {url}")
        
    # Download Description:
    try:
        filename=re.split("/", description_url)[-2] + '.html'
        r = requests.get(description_url)
        with open(f'{destination_folder}/{filename}', 'wb') as f:
            f.write(r.content)
    except Exception as e:
        print(f"Failed to load description for {name}: {str(e)} {description_url}")
        
    # Download data:
    for year in range(2016, 2022, 2):
        try:
            url = base_url(year=year, table=table_abbr, short_year=str(year)[2:])
            filename = re.split('/', url)[-1]
            r = requests.get(url)
            with open(f'{destination_folder}/{filename}', 'wb') as f:
                f.write(r.content)
        except Exception as e:
            print(f"Failed to load {name} data for {year}: {str(e)} {url}")
                

ALL_CANDIDATES...
CANDIDATE_MASTER...
CANDIDATE_COMMITTEE_LINKAGES...
HOUSE_SENATE_CAMPAIGNS...
COMMITTEE_MASTER...
PAC_SUMMARY...
INDIVIDUAL_CONTRIBUTIONS...
COMMITTEE_TO_CANDIDATE_AND_IE...
COMMITTEE_TO_COMMITTEE...
OPERATING_EXPENSES...
