In [1]:
import pandas as pd
import requests
import os
import pygsheets
import http.client
import datadotworld as dw
import json
from io import StringIO
from datetime import datetime
# from pandas.io.json import json_normalize

In [2]:
print('hello')

In [5]:
#URL and session variables
url_base = 'https://mainecampaignfinance.com'

s = requests.Session()
cookies = requests.cookies.RequestsCookieJar()

#POST headers
session_headers = {'Host':'mainecampaignfinance.com'
            ,'Origin': url_base
            ,'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:69.0) Gecko/20100101 Firefox/69.0'
            ,'Accept': 'application/octet-stream'
            ,'Accept-Language': 'en-US,en;q=0.5'
            ,'Accept-Encoding': 'gzip, deflate, br'
            ,'Referer':'https://mainecampaignfinance.com/'
            ,'Content-Type': 'application/json;charset=utf-8'
            ,'Connection': 'keep-alive'
            ,'TE':'Trailers'
            ,'Pragma': 'no-cache'
            ,'Cache-Control': 'no-cache'}

#Parameters
years = ','.join(map(str, list(range(2008, datetime.now().year))))

project = '2020-maine-state-campaign-finance'

In [None]:
def get_cands(year=None, headers=None):

    url='https://mainecampaignfinance.com/api///Organization/SearchCandidates'

    data = json.dumps(
        {
            "ElectionYear":year,
            "pageNumber":1,
            "pageSize":2147483647
        }
    )
    
    r = s.post(url, data=data, headers=headers).json()
    
    df = pd.DataFrame(r)
        
    return df

def get_trans(year=None, headers=None):
    
    s = requests.Session()
    cookies = requests.cookies.RequestsCookieJar()

    url = 'https://mainecampaignfinance.com/api///Search/TransactionSearchInformationExpExportToCSV'

    #Parameters for looping through search
    committee_types = {'candidate':'01'
                      ,'bqc':'02'
                      ,'pac':'03'
                      ,'party-committee':'09'}

    transaction_types = {'contributions':'CON'
                        ,'expenditures':'EXP'
                        ,'independent_expenditures':'IE'}

    #Data dictionary for query
    data = {"ElectionYear":year
            ,"pageNumber":'1'
            ,"pageSize":'2147483647' #Sets max responses from page (defaults to 10)
            ,"ValidationRequired":'0'}


    for type_name, trans_type in transaction_types.items():

        data.update({"TransactionType": type_name})
        
        dfs=[]

        #Reset index

        for comm, comm_id in committee_types.items():

            data.update({"CommitteeType": comm})

            try:
                r = s.post(url, data=json.dumps(data), headers=headers)
                df = pd.read_csv(StringIO(r.content.decode('utf-8')))
                dfs.append(df)
            except:
                continue

        #COMBINE dfs
        df=pd.concat(dfs,sort=False,ignore_index=True).drop_duplicates()
        
        write_df_to_datadotworld(project=project, filename=type_name, df=df)
        
        return df
        
def write_to_gsheet():
    
    #Write contribution query to GSheets
    gc = pygsheets.authorize(service_file='gcreds.json')
    gsh_idx = 0
    sheet = 'maine-state-campaign-finance-2020'

    #Retrieve query
    queryid = 'a65bf908-26ba-4f11-b413-a57bd8b3a9f5'
    project = '2020-maine-state-campaign-finance'
    results = get_datadotworld_query(project=project, queryid=queryid)

    #Prepare to load into Google Sheets
    sh = gc.open(sheet)
    wks = sh.worksheet('index',gsh_idx)
    wks.clear()
    wks.rows = results.shape[0]
    wks.set_dataframe(results,start='A1',nan='')
    
def write_df_to_datadotworld(site='darrenfishell', project=None, filename=None, df=None):
    
    try:
        with dw.open_remote_file(os.path.join(site, project, filename, '.csv')) as w:
            df.to_csv(w, index=False)
        print(f'Wrote {len(df)} records from {filename} to data.world')
    except:
        print(f'Failed to write {filename} to datadotworld.')
    
def get_datadotworld_query(site='darrenfishell', project=None, queryid=None):

    conn = http.client.HTTPSConnection("api.data.world")
    headers = { 'Authorization': f'Bearer {os.environ.get("DW_KEY")}' }
    conn.request('GET', '/v0/queries/' + queryid, headers=headers)
    data = conn.getresponse().read()
    results = dw.query(os.path.join(site, project), json.loads(data).get('body').dataframe)
    
    return results

In [None]:
cand_df = get_cands(year=years, headers=session_headers)
 
trans_df = get_trans(year=years, headers=session_headers)

write_to_gsheet()

In [None]:
os.environ