In [22]:
import pandas as pd
import requests
import os
import pygsheets
import http.client
import datadotworld as dw
import json
import config
from io import StringIO
from datetime import datetime
import config
# from pandas.io.json import json_normalize

In [43]:
#URL and session variables
url_base = 'https://mainecampaignfinance.com'

s = requests.Session()

#POST headers
session_headers = {'Host':'mainecampaignfinance.com'
            ,'Origin': url_base
            ,'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:69.0) Gecko/20100101 Firefox/69.0'
            ,'Accept': 'application/octet-stream'
            ,'Accept-Language': 'en-US,en;q=0.5'
            ,'Accept-Encoding': 'gzip, deflate, br'
            ,'Referer':'https://mainecampaignfinance.com/'
            ,'Content-Type': 'application/json;charset=utf-8'
            ,'Connection': 'keep-alive'
            ,'TE':'Trailers'
            ,'Pragma': 'no-cache'
            ,'Cache-Control': 'no-cache'}

#Parameters
# years = ','.join(map(str, list(range(2008, datetime.now().year))))
years = list(range(2014, datetime.now().year))
site = 'darrenfishell'
project = 'maine-political-cash'
queryid = '6e1f1c4e-0648-448e-9474-8fe5af324b17'

In [50]:
def get_cands(year_list=None, headers=None):

    url='https://mainecampaignfinance.com/api///Organization/SearchCandidates'
    
    dfs = []
    
    for year in year_list:
        
        data = json.dumps(
            {
                "ElectionYear":year,
                "pageNumber":1,
                "pageSize":2147483647
            }
        )
        
        r = s.post(url, data=data, headers=headers).json()
        
        df = pd.DataFrame(r)
        
        dfs.append(df)

    df=pd.concat(dfs,sort=False,ignore_index=True)
        
    return df

def get_trans(year_list=None, headers=None):
    
    s = requests.Session()

    url = 'https://mainecampaignfinance.com/api///Search/TransactionSearchInformationExpExportToCSV'

    #Parameters for looping through search
    committee_types = {
        'candidate':'01',
        'bqc':'02',
        'pac':'03',
        'party-committee':'09'
    }

    transaction_types = {
        'contributions':'CON',
        'expenditures':'EXP',
        'independent_expenditures':'IE'
    }

    #Data dictionary for query
    data = {
        "pageNumber":'1'
        ,"pageSize":'2147483647' #Sets max responses from page (defaults to 10)
        ,"ValidationRequired":'0'
    }

    for type_name, trans_type in transaction_types.items():

        dfs=[]

        for year in year_list:

            for comm, comm_id in committee_types.items():

                add_post_loop = {
                    'TransactionType': trans_type,
                    'ElectionYear': year,
                    'CommitteeType': comm_id
                }
    
                data = data | add_post_loop
    
                try:
                    r = s.post(url, data=json.dumps(data), headers=headers)
                    df = pd.read_csv(StringIO(r.content.decode('utf-8')))
                    dfs.append(df)
                except:
                    continue
        
            print(f'Loaded {len(df)} records for {trans_type} for {year}')

        #COMBINE dfs
        df=pd.concat(dfs,sort=False,ignore_index=True).drop_duplicates()
        
        write_df_to_datadotworld(project=project, filename=type_name, df=df)
    
def write_df_to_datadotworld(site='darrenfishell', project=None, filename=None, df=None):
    
    try:
        filepath = f'{os.path.join(site, project, filename)}.csv'
        
        with dw.open_remote_file(f'{site}/{project}', f'{filename}.csv') as w:
            df.to_csv(w, index=False)
            
        print(f'Wrote {len(df)} records to {filepath} in data.world')
    except Exception as e:
        print(f'Failed to write {filename} to datadotworld.')

def write_to_gsheet(site='darrenfishell', project=None, queryid=None):

    #Write contribution query to GSheets
    gc = pygsheets.authorize(service_file='gcreds.json')
    gsh_idx = 0
    sheet = 'maine-campaign-finance'
    
    #Retrieve query
    conn = http.client.HTTPSConnection("api.data.world")
    headers = { 'Authorization': f'Bearer {config.DW_KEY}' }
    conn.request('GET', '/v0/queries/' + queryid, headers=headers)
    sql_query = json.loads(conn.getresponse().read()).get('body')
    results = dw.query(os.path.join(site, project), sql_query).dataframe
    
    #Prepare to load into Google Sheets
    sh = gc.open(sheet)
    wks = sh.worksheet('index',gsh_idx)
    wks.clear()
    wks.rows = results.shape[0]
    wks.set_dataframe(results,start='A1',nan='')
    
    print(f'Wrote {len(results)} records to Google Sheets')

In [5]:
cand_df = get_cands(year_list=years, headers=session_headers)
 
get_trans(year_list=years, headers=session_headers)

Loaded 1443 records for CON for 2014
Loaded 725 records for CON for 2015
Loaded 1452 records for CON for 2016
Loaded 929 records for CON for 2017
Loaded 1857 records for CON for 2018
Loaded 1333 records for CON for 2019
Loaded 2215 records for CON for 2020
Loaded 1378 records for CON for 2021
Loaded 2341 records for CON for 2022
Loaded 1829 records for CON for 2023
Wrote 414563 records to darrenfishell/maine-campaign-finance/contributions.csv in data.world


  df = pd.read_csv(StringIO(r.content.decode('utf-8')))


Loaded 3278 records for EXP for 2014
Loaded 1066 records for EXP for 2015
Loaded 2717 records for EXP for 2016
Loaded 1119 records for EXP for 2017
Loaded 3440 records for EXP for 2018
Loaded 1466 records for EXP for 2019
Loaded 3107 records for EXP for 2020
Loaded 1587 records for EXP for 2021
Loaded 4058 records for EXP for 2022
Loaded 2221 records for EXP for 2023
Wrote 156290 records to darrenfishell/maine-campaign-finance/expenditures.csv in data.world
Loaded 769 records for IE for 2014
Loaded 0 records for IE for 2015
Loaded 843 records for IE for 2016
Loaded 0 records for IE for 2017
Loaded 658 records for IE for 2018
Loaded 13 records for IE for 2019
Loaded 698 records for IE for 2020
Loaded 50 records for IE for 2021
Loaded 912 records for IE for 2022
Loaded 12 records for IE for 2023
Wrote 12431 records to darrenfishell/maine-campaign-finance/independent_expenditures.csv in data.world


TypeError: join() argument must be str, bytes, or os.PathLike object, not 'NoneType'

In [None]:
write_to_gsheet(project=project, queryid=queryid)