In [8]:
import pickle
import pandas as pd
import requests
import time
import os

In [9]:
with open("subset_data.pickle", "rb") as file:
    subset_data = pickle.load(file)

In [10]:
YOUR_API_KEY = ""

In [11]:
def get_cosponsors(congress, bill_type, bill_number, offset):
    url = f"https://api.congress.gov/v3/bill/{congress}/{bill_type}/{bill_number}/cosponsors?api_key={YOUR_API_KEY}&format=json&offset={offset}&limit=250"
    response = requests.get(url)
    return response.json()

In [12]:
def parse_cosponsor_data(cosponsor, bill_number, congress):
    return {
        "billNumber": bill_number,
        "congress": congress,
        "sponsorshipDate": cosponsor.get("sponsorshipDate"),
        "bioguideId": cosponsor.get("bioguideId"),
        "isOriginalCosponsor": cosponsor.get("isOriginalCosponsor"),
    }

In [13]:
def save_to_csv(cosponsor_data, batch_number):
    cosponsor_df = pd.DataFrame(cosponsor_data)
    cosponsor_df.to_csv(f"cosponsor_data_batch_{batch_number}.csv", index=False)
    print(f"Saved cosponsor_data_batch_{batch_number}.csv")

In [14]:
cosponsor_data = []
batch_number = 1
bills_processed = 0

api_calls = 0
start_time = time.time()

for index, row in subset_data.iterrows():
    congress = row['congress']
    bill_number = row['number']
    bill_type = row['type']
    offset = 0

    while True:
        if api_calls >= 1000:
            elapsed_time = time.time() - start_time
            if elapsed_time < 3600:  # 3600 seconds in an hour
                time_to_wait = 3600 - elapsed_time
                time.sleep(time_to_wait)

            # Reset API call count and start time after waiting
            api_calls = 0
            start_time = time.time()

        cosponsors_data = get_cosponsors(congress, bill_number, bill_type, offset)
        api_calls += 1
        cosponsors = cosponsors_data.get("cosponsors", [])

        if not cosponsors:
            break

        for cosponsor in cosponsors:
            cosponsor_data.append(parse_cosponsor_data(cosponsor, bill_number, congress))

        offset += 250

    bills_processed += 1
    print(f"Progress: {bills_processed}/{len(subset_data)} - Bill Number: {bill_number}")

    if bills_processed % 50 == 0:
        save_to_csv(cosponsor_data, batch_number)
        cosponsor_data = []
        batch_number += 1

# Save remaining data after the loop
if cosponsor_data:
    save_to_csv(cosponsor_data, batch_number)

Progress: 1/29338 - Bill Number: 954.0
Progress: 2/29338 - Bill Number: 882.0
Progress: 3/29338 - Bill Number: 759.0
Progress: 4/29338 - Bill Number: 4562.0
Progress: 5/29338 - Bill Number: 4737.0
Progress: 6/29338 - Bill Number: 3369.0
Progress: 7/29338 - Bill Number: 3633.0
Progress: 8/29338 - Bill Number: 3210.0
Progress: 9/29338 - Bill Number: 3004.0
Progress: 10/29338 - Bill Number: 5020.0
Progress: 11/29338 - Bill Number: 4841.0
Progress: 12/29338 - Bill Number: 48.0
Progress: 13/29338 - Bill Number: 4561.0
Progress: 14/29338 - Bill Number: 4564.0
Progress: 15/29338 - Bill Number: 3678.0
Progress: 16/29338 - Bill Number: 2308.0
Progress: 17/29338 - Bill Number: 4011.0
Progress: 18/29338 - Bill Number: 3442.0
Progress: 19/29338 - Bill Number: 3125.0
Progress: 20/29338 - Bill Number: 4722.0
Progress: 21/29338 - Bill Number: 2309.0
Progress: 22/29338 - Bill Number: 2323.0
Progress: 23/29338 - Bill Number: 4563.0
Progress: 24/29338 - Bill Number: 2920.0
Progress: 25/29338 - Bill Numb

KeyboardInterrupt: 

In [None]:
# Save remaining data after the loop
if cosponsor_data:
    save_to_csv(cosponsor_data, batch_number)