In [None]:
import requests
import pandas as pd
import time
from config import api_key, census_api_key
from census import Census

# Define the date range for the data query
begin_date = "01-2000"
end_date = "12-2023"
time_frame = f'?from={begin_date}&to={end_date}'

# List of crime types for which data will be fetched
crime = ["homicide", "rape", "robbery", "aggravated-assault", "arson", "burglary", "larceny", "motor-vehicle-theft"]

# Function to fetch national crime data from the API
def fetch_crime_data(crime, time_frame, api_key):
    # Construct the base URL to call the API with the crime type and time frame
    base_url = f'https://api.usa.gov/crime/fbi/cde/summarized/national/{crime}{time_frame}{api_key}'
    response = requests.get(base_url)
    
    # Check if the API call was successful (status code 200)
    if response.status_code != 200:
        raise ValueError(f"API call failed with status {response.status_code}")
    
    # Return the JSON response if successful
    return response.json()

# Function to process the fetched national crime data into a DataFrame
def process_crime_data(crime_data):
    # Convert the raw crime data into a Pandas DataFrame
    df = pd.DataFrame(crime_data)
    
    # Define the columns to extract from the crime data JSON
    data_columns = {
        "Total Crimes": "offenses.actuals.United States",
        "Clearances": "offenses.actuals.United States Clearances",
        "Crime(Per 100k)": "offenses.rates.United States",
        "Prosecutions(Per 100k)": "offenses.rates.United States Clearances",
        "Total Pop": "populations.population.United States",
        "Partic Pop": "populations.participated_population.United States",
        "Pop Coverage": "tooltips.Percent of Population Coverage.United States"
    }
    
    result = {}
    
    # Iterate over the defined columns and extract the relevant data
    for col_name, key_path in data_columns.items():
        # Split the key path into individual keys to navigate through the nested JSON
        keys = key_path.split(".")
        data = crime_data
        
        # Navigate through the nested JSON structure using the keys
        for key in keys:
            data = data.get(key, {})
        
        # Extract the values and store them in the result dictionary
        result[col_name] = list(data.values())
        result["Crime Type"] = c
    
    # Return the processed data as a DataFrame
    return pd.DataFrame(result, index=list(data.keys()))

# Initialize an empty list to store the aggregated data
aggregated_data = []

# Iterate over each crime type to fetch, process, and aggregate the data
for c in crime:
    try:
        # Fetch crime data for the current crime type
        crime_data = fetch_crime_data(c, time_frame, api_key)
        
        # Process the crime data into a DataFrame
        df_crime_data = process_crime_data(crime_data)
        
        # Calculate the "Prosecuted %" as the ratio of clearances to total crimes, rounded to two decimal places
        df_crime_data["Prosecuted %"] = (df_crime_data["Clearances"] / df_crime_data["Total Crimes"]).round(2)
        
        # Convert the index (months) to DateTime format for consistency
        df_crime_data.index = pd.to_datetime(df_crime_data.index, format='%m-%Y', errors='coerce')
        
        # Reset the index and rename the column to "Month"
        df_crime_data = df_crime_data.reset_index().rename(columns={"index": "Month"})
        
        # Sort the data by "Month" for chronological order
        df_sorted = df_crime_data.sort_values(by=["Month"])
        
        # Aggregate the data by year
        yearly_data = df_sorted.groupby(df_sorted["Month"].dt.year).agg({
            "Total Crimes": "sum",
            "Clearances": "sum",
            "Crime(Per 100k)": "mean",
            "Prosecutions(Per 100k)": "mean",
            "Prosecuted %": "mean",
            "Total Pop": "last",
            "Partic Pop": "last",
            "Pop Coverage": "last"
        }).reset_index()
        
        # Add a column for the crime type
        yearly_data["Crime Type"] = c
        
        # Append the aggregated data to the list
        aggregated_data.append(yearly_data)
        
        # Log the successful processing and aggregation of data
        print(f"Processed Data for {c}")
    except Exception as e:
        # Log any errors encountered during processing
        print(f"Error processing {c}: {e}")
    
    # Pause for a short time to avoid overloading the API (to stay within rate limits)
    time.sleep(2)

# Concatenate the aggregated data into a single DataFrame
aggregated_df = pd.concat(aggregated_data, ignore_index=True)

# Separate violent and property crimes into different columns
violent_crimes = ["homicide", "rape", "robbery", "aggravated-assault"]
property_crimes = ["arson", "burglary", "larceny", "motor-vehicle-theft"]

aggregated_df["Violent Crimes"] = aggregated_df.apply(lambda row: row["Total Crimes"] if row["Crime Type"] in violent_crimes else 0, axis=1)
aggregated_df["Property Crimes"] = aggregated_df.apply(lambda row: row["Total Crimes"] if row["Crime Type"] in property_crimes else 0, axis=1)

# Group by year and sum the violent and property crimes
final_df = aggregated_df.groupby("Month").agg({
    "Violent Crimes": "sum",
    "Property Crimes": "sum",
    "Total Pop": "last",
    "Partic Pop": "last",
    "Pop Coverage": "last"
}).reset_index()

# Rename the "Month" column to "Year"
final_df = final_df.rename(columns={"Month": "Year"})

final_df['Violent Crime Rate'] = round((final_df['Violent Crimes'] / final_df['Total Pop']) * 100000,2)
final_df['Property Crime Rate'] = round((final_df['Property Crimes'] / final_df['Total Pop']) * 100000,2)
final_df['Total Crime Rate'] = round(((final_df['Violent Crimes'] + final_df['Property Crimes']) / final_df['Total Pop']) * 100000,2)
final_df = final_df[["Year", "Violent Crimes", "Violent Crime Rate", "Property Crimes", "Property Crime Rate", "Total Pop", 
                     "Partic Pop", "Pop Coverage", "Total Crime Rate"]]

# Run through Data for Poverty Count from Census
for index, row in final_df.iterrows():
    try:
        year = int(row['Year'])
        c = Census(
            census_api_key,
            year=year
        )
        census_data = c.acs5.get(
            (
                "B17001_002E"
            ),
            {'for': 'us'}
        )

        # Extract the poverty count
        poverty_count = census_data[0]['B17001_002E']
        final_df.at[index, 'Poverty Count'] = int(poverty_count)

        print(f"Grabbed poverty count for {year}")

    except Exception:
        print(f"Census does not contain poverty data for {year}.")

# Calculate poverty rate
final_df["Poverty Rate"] = round(final_df['Poverty Count'] / final_df['Total Pop'] * 100,2)
final_df['Poverty Count'] = final_df['Poverty Count'].fillna(0).astype(int)
final_df["Total Crimes"] = final_df['Violent Crimes'] + final_df["Property Crimes"]

# Save the final DataFrame to a CSV file
final_df.to_csv("../Resources/National/National_Crime_Poverty.csv", index=False)

print("Final aggregated data saved to '../Resources/National/National_Crime_Poverty.csv'")

Processed Data for homicide
Processed Data for rape
Processed Data for robbery
Processed Data for aggravated-assault
Processed Data for arson
Processed Data for burglary
Processed Data for larceny
Processed Data for motor-vehicle-theft
Census does not contain poverty data for 2000.
Census does not contain poverty data for 2001.
Census does not contain poverty data for 2002.
Census does not contain poverty data for 2003.
Census does not contain poverty data for 2004.
Census does not contain poverty data for 2005.
Census does not contain poverty data for 2006.
Census does not contain poverty data for 2007.
Census does not contain poverty data for 2008.
Grabbed poverty count for 2009
Grabbed poverty count for 2010
Grabbed poverty count for 2011
Grabbed poverty count for 2012
Grabbed poverty count for 2013
Grabbed poverty count for 2014
Grabbed poverty count for 2015
Grabbed poverty count for 2016
Grabbed poverty count for 2017
Grabbed poverty count for 2018
Grabbed poverty count for 2019
