In [1]:
'''
Setting up function to iterate over a daterange and interval. We'll use this to scrape the full record of results.
'''

from datetime import date, timedelta

def iterate_function(interval, start_date, end_date, function):
    curr_date = start_date
    next_date = start_date + interval

    while curr_date < end_date:
        function(curr_date, next_date)
        curr_date = next_date

        next_date += interval

        if next_date > end_date:
            next_date = end_date

# interval = timedelta(weeks=4)
# start_date = date(2009, 1, 1)
# end_date = date(2011, 1, 1)

# def function(curr_date, next_date):
#     print( "({}, {})".format(curr_date, next_date) )            

# iterate_function(interval, start_date, end_date, function)

In [2]:
import requests

total_receipts = lambda receipts: sum(list(map(lambda x: x['amount'], receipts)))

def scrape_receipts(curr_date, next_date):
    global cid
    endpoint = 'https://illinoissunshine.org/api/receipts/'
    payload = {'committee_id': cid, 'received_date__ge': str(curr_date), 'received_date__lt': str(next_date)}
    r = requests.get(endpoint, params=payload)
    
    if(r.status_code == 200):
        try:
            global receipts
            l_receipts = r.json()['objects'][0]['receipts']
            receipts += l_receipts
            total = total_receipts(receipts)
            print("Week of %s\t%i\t%.2f" % (curr_date, len(l_receipts), total / 1000000))

        except:
            total = 0
            print("Week of %s\t%i\t%.2f" % (curr_date, 0, total / 1000000))
    else:
        print("API not responsive")
    
    return pd.DataFrame(receipts)

In [3]:
import json

slugify = lambda x: x.lower().replace(' ', '-')

def getFileName(campaign):
    candidate = campaign['candidate']
    cid = campaign['cid']
    start_date = campaign['start_date']
    end_date = campaign['end_date']
    receipts = campaign['receipts']
    slug = "%s--%s--%s--%s" % (candidate, cid, start_date, end_date)
    filename = 'receipts/' + slugify(slug) + '.json'
    return filename

def getCachedReceipts(campaign):
    filename = getFileName(campaign)
    try:
        with open(filename, 'r') as f:
            return json.load(f)
    except:
        return None
    
def writeCachedReceipts(campaign):
    filename = getFileName(campaign)
    with open(filename, 'w') as f:
        json.dump(receipts, f)

In [9]:
import pandas as pd


def getFiltered(l_min, l_max, a):
    return [e for e in a if (e > l_min and  e <= l_max)]

def getSum(l_min, l_max, a, non_itemized=0):
    filtered = getFiltered(l_min, l_max, a)
#     print("%s %s %.3f" % (l_min, l_max, sum(filtered) / 1e6))
    return (sum(filtered) + non_itemized)

def getBinned(receipts, non_itemized=0):
    df = pd.DataFrame(receipts)
    a = df.amount.values
    bins = [0, 1e3, 1e4, 1e5, 1e9]
    
    binned = []
    
    for i, n in enumerate(bins):
        try:
            l_min = bins[i]
            l_max = bins[i+1]
            if i == 0:
                binned.append(getSum(l_min, l_max, a, non_itemized) / 1e6)
            else:
                binned.append(getSum(l_min, l_max, a) / 1e6)
        except IndexError:
            pass
    
    print( bins )
    print( binned )
    print()

campaigns = [
    {
        'candidate': 'Bill Brady',
        'cid': 7537,
        'start_date': date(2009, 1, 1),
        'end_date': date(2011, 1, 1),
        'non_itemized': 504000,
        'receipts': None
    },
    {
        'candidate': 'Pat Quinn',
        'cid': 22730,
        'start_date': date(2009, 1, 1),
        'end_date': date(2011, 1, 1),
        'non_itemized': 0,
        'receipts': None
    },
    {
        'candidate': 'Pat Quinn',
        'cid': 1212,
        'start_date': date(2009, 1, 1),
        'end_date': date(2011, 1, 1),
        'non_itemized': 225000,
        'receipts': None
    },
    {
        'candidate': 'Bruce Rauner',
        'cid': 25185,
        'start_date': date(2013, 1, 1),
        'end_date': date(2014, 12, 30),
        'non_itemized': 1025000,
        'receipts': None
    },
    {
        'candidate': 'Pat Quinn',
        'cid': 1212,
        'start_date': date(2013, 1, 1),
        'end_date': date(2015, 1, 1),
        'non_itemized': 350000,
        'receipts': None
    },
    {
        'candidate': 'Bruce Rauner',
        'cid': 25185,
        'start_date': date(2014, 12, 30),
        'end_date': date(2018, 7, 1),
        'non_itemized': 549000,
        'receipts': None
    },
    {
        'candidate': 'JB Pritzker',
        'cid': 32762,
        'start_date': date(2015, 1, 1),
        'end_date': date(2018, 7, 1),
        'non_itemized': 1000,
        'receipts': None
    },
]

for campaign in campaigns:
    candidate = campaign['candidate']
    cid = campaign['cid']
    start_date = campaign['start_date']
    end_date = campaign['end_date']
    receipts = campaign['receipts']
    non_itemized = campaign['non_itemized']
    
    # try reading from cached file
    if receipts is None:
        receipts = getCachedReceipts(campaign)
    
    # finally query API
    if receipts is None:
        receipts = []
        interval = timedelta(weeks=1)
        print('\nTallying receipts for %s %i...' % (candidate, cid))
        iterate_function(interval, start_date, end_date, scrape_receipts)
        campaign['receipts'] = receipts
        writeCachedReceipts(campaign)
    
    print( getFileName(campaign) )
    getBinned(receipts, non_itemized)
    

receipts/bill-brady--7537--2009-01-01--2011-01-01.json
[0, 1000.0, 10000.0, 100000.0, 1000000000.0]
[2.93135935, 4.253476799999999, 4.366351010000001, 9.2806036]

receipts/pat-quinn--22730--2009-01-01--2011-01-01.json
[0, 1000.0, 10000.0, 100000.0, 1000000000.0]
[0.7201050200000001, 1.7346643600000005, 5.69871008, 6.385499]

receipts/pat-quinn--1212--2009-01-01--2011-01-01.json
[0, 1000.0, 10000.0, 100000.0, 1000000000.0]
[1.38723058, 2.6721884799999995, 4.72084196, 1.7]

receipts/bruce-rauner--25185--2013-01-01--2014-12-30.json
[0, 1000.0, 10000.0, 100000.0, 1000000000.0]
[4.250206920000001, 9.601332220000005, 8.69148008, 47.34158014]

receipts/pat-quinn--1212--2013-01-01--2015-01-01.json
[0, 1000.0, 10000.0, 100000.0, 1000000000.0]
[2.205106380000001, 5.597455880000002, 9.540345360000002, 14.609633630000001]

receipts/bruce-rauner--25185--2014-12-30--2018-07-01.json
[0, 1000.0, 10000.0, 100000.0, 1000000000.0]
[1.30857583, 0.7793831099999999, 1.3196400899999998, 100.45]

receipts/jb-