-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNY_read_senate_api.py
More file actions
128 lines (100 loc) · 3.86 KB
/
Copy pathNY_read_senate_api.py
File metadata and controls
128 lines (100 loc) · 3.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import argparse
import json
import logging
import os
import shutil
import time
import pandas as pd
import requests
from utils import get_ny_senate_api_key
get_ny_senate_api_key.main()
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def fetch_ny_senate_bills(year: int, api_key=None) -> None:
if not isinstance(year, int) or year < 1000 or year > 9999:
raise ValueError("Year must be a 4-digit integer.")
# Set initial offset and API key
offset = 1
base_url = f"https://legislation.nysenate.gov/api/3/bills/{year}"
logger.info(f"Fetching bills for {year} from {base_url}")
if not api_key:
api_key = get_ny_senate_api_key.main()
# Ensure the save directory exists
save_dir = os.path.abspath(
os.path.join(os.path.dirname(__file__), "..", "..", "data", "raw", "senate-api")
)
os.makedirs(save_dir, exist_ok=True)
logger.info(f"Creating temporary directory {save_dir}")
while True:
# Construct the full URL
url = f"{base_url}?key={api_key}&limit=1000&offset={offset}"
# Create a filename for this batch
filename = os.path.join(save_dir, f"bills_{year}_offset_{offset}.json")
# Fetch the data
response = requests.get(url)
data = response.json()
if not data["success"]:
print("Failed to fetch data. Stopping.")
return None
# Check if the response is an empty list
if data["responseType"] == "empty list":
print("Received 'empty list' response. Stopping.")
return None
else:
# Save the data to a file
with open(filename, "w") as f:
json.dump(data, f)
logger.info(f"Saved {filename}")
# Increase the offset for the next iteration
offset += 1000
# Optional: Add a small delay to avoid overwhelming the API
time.sleep(0.5)
def merge_json_files(directory: str, output_file: str) -> None:
"""
Merge all JSON files in the given directory into a single JSON file.
Args:
directory (str): The directory containing the JSON files.
output_file (str): The path to the output file.
"""
logger.info(f"Merging JSON files from {directory} into {output_file}")
merged_data = []
for filename in os.listdir(directory):
if filename.endswith(".json"):
file_path = os.path.join(directory, filename)
with open(file_path, "r") as f:
data = json.load(f)["result"]["items"]
merged_data.extend(data)
with open(output_file, "w") as f:
json.dump(merged_data, f, indent=4)
print(f"Merged {len(merged_data)} bills into {output_file}")
shutil.rmtree(directory)
logger.info(f"Deleted temporary directory {directory}")
def main(year: int, api_key) -> pd.DataFrame:
fetch_ny_senate_bills(year, api_key)
merge_json_files(
os.path.join("..", "..", "data", "raw", "senate-api"),
os.path.join("..", "..", "data", "raw", f"NY-{year}-senate.json"),
)
bills_df = pd.read_json(
os.path.join("..", "data", "raw", f"NY-{year}-senate.json")
).reset_index(drop=True)
return bills_df
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--year", type=int, help="Year for which to fetch bills")
parser.add_argument("--api_key", type=str, help="API key for LegiScan API")
cwd = os.getcwd()
try:
args = parser.parse_args()
year = args.year
api_key = None
if args.api_key:
api_key = args.api_key
except SystemExit:
print("Usage: python NY_read_senate_api.py <year> [--api_key <api_key>]")
raise
os.chdir("/Users/henryjosephson/personal/Projects/leg_eff/src")
main(year, api_key)
os.chdir(cwd)