In [None]:
import requests
import pandas as pd
import time
from config import api_key

# Define the date range for the data query
begin_date = "01-2000"
end_date = "12-2023"
time_frame = f'?from={begin_date}&to={end_date}'

# List of crime types for which data will be fetched
crime = ["homicide", "rape", "robbery", "aggravated-assault", "arson", "burglary", "larceny", "motor-vehicle-theft"]

# Dictionary of states with their abbreviations and full names
states = {
    "AK": "Alaska", "AL": "Alabama", "AR": "Arkansas", "AZ": "Arizona",
    "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DC": "District of Columbia",
    "DE": "Delaware", "FL": "Florida", "GA": "Georgia", "HI": "Hawaii",
    "IA": "Iowa", "ID": "Idaho", "IL": "Illinois", "IN": "Indiana",
    "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana", "MA": "Massachusetts",
    "MD": "Maryland", "ME": "Maine", "MI": "Michigan", "MN": "Minnesota",
    "MO": "Missouri", "MS": "Mississippi", "MT": "Montana", "NC": "North Carolina",
    "ND": "North Dakota", "NE": "Nebraska", "NH": "New Hampshire", "NJ": "New Jersey",
    "NM": "New Mexico", "NV": "Nevada", "NY": "New York", "OH": "Ohio",
    "OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island",
    "SC": "South Carolina", "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas",
    "UT": "Utah", "VA": "Virginia", "VT": "Vermont", "WA": "Washington",
    "WI": "Wisconsin", "WV": "West Virginia", "WY": "Wyoming"
}

# Function to fetch crime data from the API
def fetch_crime_data(crime, state_code, time_frame, api_key):
    base_url = f'https://api.usa.gov/crime/fbi/cde/summarized/state/{state_code}/{crime}{time_frame}{api_key}'
    response = requests.get(base_url)
    if response.status_code != 200:
        raise ValueError(f"API call failed with status {response.status_code}")
    return response.json()

# Function to process the fetched crime data into a DataFrame
def process_crime_data(crime_data, state_name):
    # Define the columns to extract from the crime data
    data_columns = {
        "Total Crimes": f"offenses.actuals.{state_name}",
        "Clearances": f"offenses.actuals.{state_name} Clearances",
        "Crime(Per 100k)": f"offenses.rates.{state_name}",
        "Prosecutions(Per 100k)": f"offenses.rates.{state_name} Clearances",
        "Total Pop": f"populations.population.{state_name}",
        "Partic Pop": f"populations.participated_population.{state_name}",
        "Pop Coverage": f"tooltips.Percent of Population Coverage.{state_name}"
    }
    
    result = {}
    for col_name, key_path in data_columns.items():
        # Navigate through the nested JSON data structure to extract values
        keys = key_path.split(".")
        data = crime_data
        for key in keys:
            data = data.get(key, {})
        result[col_name] = list(data.values())
    
    return pd.DataFrame(result, index=list(data.keys()))

# Create a dictionary to hold data for each crime type
crime_data_dict = {c: [] for c in crime}

# Loop over each state and each crime type to fetch and process the data
for state_code, state_name in states.items():
    for c in crime:
        try:
            # Fetch crime data for the state and crime
            crime_data = fetch_crime_data(c, state_code, time_frame, api_key)
            
            # Process the crime data into a DataFrame
            df_crime_data = process_crime_data(crime_data, state_name)
            
            # Add state abbreviation and crime type as columns
            df_crime_data["State"] = state_code
            df_crime_data["Crime"] = c
            
            # Convert the index (months) to DateTime format and reset the index
            df_crime_data.index = pd.to_datetime(df_crime_data.index, format='%m-%Y', errors='coerce')
            df_crime_data = df_crime_data.reset_index().rename(columns={"index": "Month"})
            
            # Append the data to the crime_data_dict under the corresponding crime type
            crime_data_dict[c].append(df_crime_data)
            
            print(f"Processed data for {state_name} ({state_code}) - {c}")
        except Exception as e:
            print(f"Error processing {state_name} ({state_code}) - {c}: {e}")
    
    # Pause to avoid overloading the API
    time.sleep(2)

# Save each crime type to a separate CSV file
for c, data in crime_data_dict.items():
    # Combine data for the crime type into a single DataFrame and calculate Prosecuted %
    all_data_df = pd.concat(data, ignore_index=True)
    all_data_df["Prosecuted %"] = (all_data_df["Clearances"] / all_data_df["Total Crimes"]).round(2)
    
    # Sort by State and Month
    all_data_df = all_data_df.sort_values(by=["State", "Month"])
    
    # Reorder columns as needed
    reorder = ["Month", "State", "Total Crimes", "Clearances", "Prosecuted %", "Crime(Per 100k)", "Prosecutions(Per 100k)", "Total Pop", "Partic Pop", "Pop Coverage", "Crime"]
    all_data_df = all_data_df[reorder]
    
    # Save the combined DataFrame to a CSV file
    all_data_df.to_csv(f"../Resources/State/{c}_state_crime_data.csv", index=False)

    # Log the successful saving of the file
    print(f"All data for {c} saved to '../Resources/State/{c}_state_crime_data.csv'")


Processed data for Alaska (AK) - homicide
Processed data for Alaska (AK) - rape
Processed data for Alaska (AK) - robbery
Processed data for Alaska (AK) - aggravated-assault
Processed data for Alaska (AK) - arson
Processed data for Alaska (AK) - burglary
Processed data for Alaska (AK) - larceny
Processed data for Alaska (AK) - motor-vehicle-theft
Processed data for Alabama (AL) - homicide
Processed data for Alabama (AL) - rape
Processed data for Alabama (AL) - robbery
Processed data for Alabama (AL) - aggravated-assault
Processed data for Alabama (AL) - arson
Processed data for Alabama (AL) - burglary
Processed data for Alabama (AL) - larceny
Processed data for Alabama (AL) - motor-vehicle-theft
Processed data for Arkansas (AR) - homicide
Processed data for Arkansas (AR) - rape
Processed data for Arkansas (AR) - robbery
Processed data for Arkansas (AR) - aggravated-assault
Processed data for Arkansas (AR) - arson
Processed data for Arkansas (AR) - burglary
Processed data for Arkansas (A