In [6]:
import requests
import pandas as pd
import time
from config import api_key

# Define the date range for the data query
begin_date = "01-2000"
end_date = "12-2023"
time_frame = f'?from={begin_date}&to={end_date}'

# List of crime types for which data will be fetched
crime = ["homicide", "rape", "robbery", "aggravated-assault", "arson", "burglary", "larceny", "motor-vehicle-theft"]

# Function to fetch national crime data from the API
def fetch_crime_data(crime, time_frame, api_key):
    # Construct the base URL to call the API with the crime type and time frame
    base_url = f'https://api.usa.gov/crime/fbi/cde/summarized/national/{crime}{time_frame}{api_key}'
    response = requests.get(base_url)
    
    # Check if the API call was successful (status code 200)
    if response.status_code != 200:
        raise ValueError(f"API call failed with status {response.status_code}")
    
    # Return the JSON response if successful
    return response.json()

# Function to process the fetched national crime data into a DataFrame
def process_crime_data(crime_data):
    # Convert the raw crime data into a Pandas DataFrame
    df = pd.DataFrame(crime_data)
    
    # Define the columns to extract from the crime data JSON
    data_columns = {
        "Total Crimes": "offenses.actuals.United States",
        "Clearances": "offenses.actuals.United States Clearances",
        "Crime(Per 100k)": "offenses.rates.United States",
        "Prosecutions(Per 100k)": "offenses.rates.United States Clearances",
        "Total Pop": "populations.population.United States",
        "Partic Pop": "populations.participated_population.United States",
        "Pop Coverage": "tooltips.Percent of Population Coverage.United States"
    }
    
    result = {}
    
    # Iterate over the defined columns and extract the relevant data
    for col_name, key_path in data_columns.items():
        # Split the key path into individual keys to navigate through the nested JSON
        keys = key_path.split(".")
        data = crime_data
        
        # Navigate through the nested JSON structure using the keys
        for key in keys:
            data = data.get(key, {})
        
        # Extract the values and store them in the result dictionary
        result[col_name] = list(data.values())
    
    # Return the processed data as a DataFrame
    return pd.DataFrame(result, index=list(data.keys()))

# Function to save the processed DataFrame to a CSV file
def save_to_csv(df, crime):
    # Save the DataFrame as a CSV file in the specified folder
    df.to_csv(f"../Resources/National/{crime}_national_data.csv", index=False)

# Iterate over each crime type to fetch, process, and save the data
for c in crime:
    try:
        # Fetch crime data for the current crime type
        crime_data = fetch_crime_data(c, time_frame, api_key)
        
        # Process the crime data into a DataFrame
        df_crime_data = process_crime_data(crime_data)
        
        # Calculate the "Prosecuted %" as the ratio of clearances to total crimes, rounded to two decimal places
        df_crime_data["Prosecuted %"] = (df_crime_data["Clearances"] / df_crime_data["Total Crimes"]).round(2)
        
        # Convert the index (months) to DateTime format for consistency
        df_crime_data.index = pd.to_datetime(df_crime_data.index, format='%m-%Y', errors='coerce')
        
        # Reset the index and rename the column to "Month"
        df_crime_data = df_crime_data.reset_index().rename(columns={"index": "Month"})
        
        # Sort the data by "Month" for chronological order
        df_sorted = df_crime_data.sort_values(by=["Month"])
        
        # Reorder columns to ensure the desired structure in the final output
        reorder = ["Month", "Total Crimes", "Clearances", "Prosecuted %", "Crime(Per 100k)", "Prosecutions(Per 100k)", "Total Pop", "Partic Pop", "Pop Coverage"]
        df_sorted = df_sorted[reorder]
        
        # Save the sorted DataFrame to a CSV file
        save_to_csv(df_sorted, c)
        
        # Log the successful processing and saving of data
        print(f"Processed Data for {c} and saved to '../Resources/National/{c}_national_data.csv'")
    except Exception as e:
        # Log any errors encountered during processing
        print(f"Error processing {c}: {e}")
    
    # Pause for a short time to avoid overloading the API (to stay within rate limits)
    time.sleep(2)


Processed Data for homicide and saved to '../Resources/National/homicide_national_data.csv'
Processed Data for rape and saved to '../Resources/National/rape_national_data.csv'
Processed Data for robbery and saved to '../Resources/National/robbery_national_data.csv'
Processed Data for aggravated-assault and saved to '../Resources/National/aggravated-assault_national_data.csv'
Processed Data for arson and saved to '../Resources/National/arson_national_data.csv'
Processed Data for burglary and saved to '../Resources/National/burglary_national_data.csv'
Processed Data for larceny and saved to '../Resources/National/larceny_national_data.csv'
Processed Data for motor-vehicle-theft and saved to '../Resources/National/motor-vehicle-theft_national_data.csv'
