# Load UBISc Data

Loads bills, sponsorships, and votes from Openstates, and exports to CSV.

In [1]:
import pyopenstates
import pandas as pd
from pandas.io.json import json_normalize

In [2]:
pyopenstates.set_api_key('b6e279fd-5dd2-4bdf-b5f5-0520748f8f11')

List search queries.

eic is omitted as it primarily [yields](https://openstates.org/api/v1/bills/?q=eic&apikey=b6e279fd-5dd2-4bdf-b5f5-0520748f8f11) Spanish-language bills from Puerto Rico.

TODO: Create mechanism to add specific bills.

In [3]:
QUERIES = ['earned income credit',
           'earned income tax credit',
           'eitc',
           'child tax credit',
           'ctc',
           'cdctc', 
           'basic income',
           'negative income tax',
           # For Hawaii's HCR89 bill mandating research into UBI.
           'Basic Economic Security',
           # For California's carbon dividend proposal, SB775
           # (17 false positives).
           'market-based compliance mechanisms',
           # Colorado's EITC.
           'Colorado Working Families Economic Opportunity Act']
bill_fields = ['id', 'state', 'session', 'bill_id', 'title', 'created_at']
all_fields = list(bill_fields)
all_fields.append('sponsors')

In [4]:
bills = pd.DataFrame()
sponsorships = pd.DataFrame()
for query in QUERIES:
    print(query)
    tmp_bills_json = pyopenstates.search_bills(q=query,  fields=all_fields)
    tmp_bills = pd.DataFrame(tmp_bills_json)
    tmp_bills['query'] = query
    # Ignore errors since some queries may not return bills with sponsors.
    bills = pd.concat([bills, 
                       tmp_bills.drop('sponsors', axis=1, errors='ignore')])
    tmp_sponsorships = json_normalize(tmp_bills_json, 'sponsors', 'id')
    sponsorships = pd.concat([sponsorships, tmp_sponsorships])

earned income credit
earned income tax credit
eitc
child tax credit
ctc
cdctc
basic income
negative income tax
Basic Economic Security
market-based compliance mechanisms
Colorado Working Families Economic Opportunity Act


Why the extra fields?

In [5]:
sponsorships = sponsorships[['id', 'leg_id', 'name', 'type']]

In [6]:
sponsorships[:5]

Unnamed: 0,id,leg_id,name,type
0,NYB00101179,NYL000113,GOODELL,primary
1,NYB00101179,NYL000068,BARCLAY,cosponsor
2,NYB00101179,NYL000174,RA,cosponsor
3,NYB00101179,NYL000166,OAKS,cosponsor
4,NYB00101179,NYL000106,FRIEND,cosponsor


In [7]:
sponsorships.shape

(3721, 4)

In [8]:
bills.shape

(559, 7)

In [9]:
bills.tail(3)

Unnamed: 0,bill_id,created_at,id,query,session,state,title
16,AB 1906,2012-02-24 02:06:18,CAB00008585,market-based compliance mechanisms,20112012,ca,California Global Warming Solutions Act of 200...
17,SB 864,2011-03-24 20:45:17,CAB00006137,market-based compliance mechanisms,20112012,ca,Emissions of greenhouse gases: market-based co...
0,SB 13-001,2013-01-10 00:10:21,COB00001394,Colorado Working Families Economic Opportunity...,2013A,co,Colorado Working Families Economic Opportunity...


Deduplicate bills with multiple search queries.

In [10]:
dedup = pd.DataFrame({'queries' : bills.groupby(bill_fields).query.apply(
    lambda x: "{%s}" % ', '.join(x))}).reset_index()

In [11]:
pd.DataFrame(dedup.groupby('queries').size()).reset_index().sort_values(
    0, ascending=False)

Unnamed: 0,queries,0
8,{earned income tax credit},362
6,{earned income credit},97
4,{ctc},25
9,{eitc},18
10,{market-based compliance mechanisms},18
3,{child tax credit},13
5,"{earned income credit, earned income tax credit}",9
0,{Basic Economic Security},2
7,"{earned income tax credit, eitc}",2
1,{Colorado Working Families Economic Opportunit...,1


Strip timestamp from `created_at` for better exporting to Sheets (via Bigquery).

In [12]:
dedup['created_at'] = dedup.created_at.dt.strftime('%Y-%m-%d')

Add openstates URL

In [13]:
dedup['openstates_url'] = ('https://openstates.org/' + dedup.state + 
                           '/bills/' + dedup.session + '/' + dedup.bill_id)

## Export to CSV

TODO: To Sheets directly.

In [14]:
dedup.to_csv('~/dedup.csv')
sponsorships.to_csv('~/sponsorships.csv')