In [1]:
import requests
from collections import Counter

In [72]:
def openstates_v2_request (payload):
    """
    payload: string correspondong to a query.
    return: JSON object.
    """
    headers = {'x-api-key': API_KEY,
    'Content-Type': "application/x-www-form-urlencoded",}
    url = "https://openstates.org/graphql"
    response = requests.post(url, data=payload, headers=headers)
    return response.json()

def get_bill (new_session_id, state_name="California", session_year="20192020"):
    payload = 'query= {bill(jurisdiction: "%s", session: "%s", identifier: "%s") {id}}' % (state_name, session_year, new_session_id)
    return openstates_v2_request(payload)

def all_CA_bills (after=""):
    payload = 'query= { bills(first: 100, jurisdiction: "California", session: "20192020", after: "%s") { totalCount pageInfo { startCursor endCursor hasNextPage } edges { bill:node { id identifier title classification updatedAt createdAt fromOrganization { id name} } } } } ' % (after)
    return openstates_v2_request(payload)

def find_bill (subject, state_name="California", session_year="20192020"):
    payload = 'query= { bills(first: 100, jurisdiction: "California", session: "20192020", subject: "%s") { totalCount pageInfo { startCursor endCursor hasNextPage } edges { bill:node { id identifier title classification updatedAt createdAt fromOrganization { id name} } } } } ' % (subject)
    return openstates_v2_request(payload)         

In [9]:
CA_list_of_bills = []

In [10]:
batch_100_bills = all_CA_bills()

In [11]:
total_bills = batch_100_bills['data']['bills']['totalCount']
for bill in batch_100_bills['data']['bills']['edges']:
    CA_list_of_bills.append(bill)
len(CA_list_of_bills) == 100

True

In [12]:
has_next = batch_100_bills['data']['bills']['pageInfo']['hasNextPage']
while has_next:
    batch_100_bills = all_CA_bills(after=batch_100_bills['data']['bills']['pageInfo']['endCursor'])
    for bill in batch_100_bills['data']['bills']['edges']:
        CA_list_of_bills.append(bill)
    has_next = batch_100_bills['data']['bills']['pageInfo']['hasNextPage']

In [65]:
total_bills == len(CA_list_of_bills)

True

In [14]:
CA_bill_ids_with_ocd = [(bill['bill']['id'], bill['bill']['identifier'], bill['bill']['title'])
                        for bill in CA_list_of_bills]

In [15]:
#list of tuples. CA_bill_ids_and_titles[0] is the identifier (eg. "AB 2"), [1] is the title
CA_bill_ids_and_titles = [(bill['bill']['identifier'], bill['bill']['title'])
                          for bill in CA_list_of_bills]

In [66]:
count = Counter(CA_bill_ids_and_titles)
duplicate_bills = [bill for bill in count if count[bill] > 1]
#create list of just duplicate bill ids
duplicate_session_ids = [bill[0] for bill in duplicate_bills]

In [67]:
len(duplicate_session_ids)

1741

In [23]:
for bill in CA_list_of_bills:
    if bill['bill']['identifier'] == duplicate_session_ids[87]:
        print(bill)

{'bill': {'id': 'ocd-bill/e9cca90b-65e2-46d7-9c02-7937bc59f3c5', 'identifier': 'AB 696', 'title': 'Juveniles: wards.', 'classification': ['bill'], 'updatedAt': '2019-02-21 18:07:43.131291+00:00', 'createdAt': '2019-02-20 18:07:06.090661+00:00', 'fromOrganization': {'id': 'ocd-organization/98623fa0-f9c3-47ae-80bb-51465349ed71', 'name': 'Senate'}}}
{'bill': {'id': 'ocd-bill/99428608-c83d-4e77-92a2-e57ceb213f5a', 'identifier': 'AB 696', 'title': 'Juveniles: wards.', 'classification': ['bill'], 'updatedAt': '2019-02-27 18:08:07.600559+00:00', 'createdAt': '2019-02-27 18:08:07.600535+00:00', 'fromOrganization': {'id': 'ocd-organization/e70e812e-44d2-46b0-9eae-2f3b9a1d5150', 'name': 'Assembly'}}}


In [26]:
duplicate_dictionary = {}
for bill in CA_list_of_bills:
    if bill['bill']['identifier'] in duplicate_session_ids:
        if bill['bill']['identifier'] in duplicate_dictionary:
            duplicate_dictionary[bill['bill']['identifier']].append(bill)
        else:
            duplicate_dictionary[bill['bill']['identifier']] = [bill]

In [38]:
assembly_bills_with_bad_senate_version = []
for item in duplicate_dictionary:
    if ( (duplicate_dictionary[item][0]['bill']['fromOrganization']['name'] == 'Senate' 
        or duplicate_dictionary[item][1]['bill']['fromOrganization']['name'] == 'Senate')
        and duplicate_dictionary[item][0]['bill']['identifier'][:2] == 'AB'):
        assembly_bills_with_bad_senate_version.append(duplicate_dictionary[item])

In [41]:
senate_bills_with_bad_assembly_version = []
for item in duplicate_dictionary:
    if ( (duplicate_dictionary[item][0]['bill']['fromOrganization']['name'] == 'Assembly' 
        or duplicate_dictionary[item][1]['bill']['fromOrganization']['name'] == 'Assembly')
        and duplicate_dictionary[item][0]['bill']['identifier'][:2] == 'SB'):
        senate_bills_with_bad_assembly_version.append(duplicate_dictionary[item])

In [52]:
other_duplicates = []
for item in duplicate_dictionary:
    if (duplicate_dictionary[item][0]['bill']['fromOrganization']['name'] != 
        duplicate_dictionary[item][1]['bill']['fromOrganization']['name'] 
       and duplicate_dictionary[item][0]['bill']['identifier'][:2] not in ['AB', 'SB']):
        other_duplicates.append(duplicate_dictionary[item])

In [53]:
len(other_duplicates) + len(assembly_bills_with_bad_senate_version) + len(senate_bills_with_bad_assembly_version) == len(duplicate_dictionary)

True

In [61]:
#if a bill is a duplicate, the "bad" version of the bill is the one that doesn't match
# it's identifier. That is, an "AB" "ACR" bill shouldn't be coming from the Senate and
# and "SB" "SCR" shouldn't emanate from the Assembly.

to_purge = []
for bill in CA_list_of_bills:
    if bill['bill']['identifier'] in duplicate_session_ids:
        if bill['bill']['identifier'][:1] == 'A' and bill['bill']['fromOrganization']['name'] == 'Senate':
            to_purge.append(bill)
        elif bill['bill']['identifier'][:1] == 'S' and bill['bill']['fromOrganization']['name'] == 'Assembly':
            to_purge.append(bill)

In [69]:
with open("to_purge.txt", "w") as output:
    output.write(str(to_purge))
with open("duplicate_bills.txt", "w") as output:
    output.write(str(duplicate_bills))

In [71]:
len(duplicate_bills) == len(to_purge)

True