In [None]:
import requests
import pandas as pd
import time
from config import api_key, census_api_key
from pprint import pprint
from census import Census

In [2]:
# List of states with their abbreviations
states = {
    "AK": "Alaska", "AL": "Alabama", "AR": "Arkansas", "AZ": "Arizona",
    "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DC": "District of Columbia",
    "DE": "Delaware", "FL": "Florida", "GA": "Georgia", "HI": "Hawaii",
    "IA": "Iowa", "ID": "Idaho", "IL": "Illinois", "IN": "Indiana",
    "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana", "MA": "Massachusetts",
    "MD": "Maryland", "ME": "Maine", "MI": "Michigan", "MN": "Minnesota",
    "MO": "Missouri", "MS": "Mississippi", "MT": "Montana", "NC": "North Carolina",
    "ND": "North Dakota", "NE": "Nebraska", "NH": "New Hampshire", "NJ": "New Jersey",
    "NM": "New Mexico", "NV": "Nevada", "NY": "New York", "OH": "Ohio",
    "OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island",
    "SC": "South Carolina", "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas",
    "UT": "Utah", "VA": "Virginia", "VT": "Vermont", "WA": "Washington",
    "WI": "Wisconsin", "WV": "West Virginia", "WY": "Wyoming"
}

# Function to fetch agency data from the API
def fetch_agency_data(state_abbr):
    base_url = f'https://api.usa.gov/crime/fbi/cde/agency/byStateAbbr/{state_abbr}'
    response = requests.get(base_url, api_key)
    if response.status_code != 200:
        raise ValueError(f"API call failed for {state_abbr} with status {response.status_code}")
    return response.json()

# List to hold filtered data from all states
all_filtered_data = []

# Iterate over each state and process the data
for state_abbr, state_name in states.items():
    try:
        # Fetch data for the current state
        response = fetch_agency_data(state_abbr)
        
        # Iterate through all counties in the response data
        for county, agencies in response.items():
            for agency in agencies:
                # Filter agencies with `agency_type_name` == "City" and valid coordinates
                if (
                    agency.get('agency_type_name') == 'City' and
                    agency.get('latitude') is not None and
                    agency.get('longitude') is not None
                ):
                    # Append filtered data to the list
                    all_filtered_data.append({
                        'State': state_abbr,
                        'Agency Name': agency.get('agency_name'),
                        'Latitude': agency.get('latitude'),
                        'Longitude': agency.get('longitude'),
                        'ORI': agency.get('ori'),
                    })
        print(f"Processed data for {state_name} ({state_abbr})")
    except Exception as e:
        print(f"Error processing {state_name} ({state_abbr}): {e}")
    
    # Pause to avoid overloading the API
    time.sleep(1)

# Convert the collected data into a single Pandas DataFrame
df_all_agencies = pd.DataFrame(all_filtered_data)

# Add a new "City" column by removing "Police Department" from the "Agency Name"
df_all_agencies["City"] = (
    df_all_agencies["Agency Name"]
    .str.replace(r"Police Department", "", regex=True)  # Remove "Police Department"
    .str.strip()  # Remove leading/trailing spaces
)

# Save the DataFrame to a CSV file
output_path = "../Resources/Agency/Agency_Data/filtered_city_agencies.csv"
df_all_agencies.to_csv(output_path, index=False)

print(f"All filtered agency data saved to {output_path}")


Processed data for Alaska (AK)
Processed data for Alabama (AL)
Processed data for Arkansas (AR)
Processed data for Arizona (AZ)
Processed data for California (CA)
Processed data for Colorado (CO)
Processed data for Connecticut (CT)
Processed data for District of Columbia (DC)
Processed data for Delaware (DE)
Processed data for Florida (FL)
Processed data for Georgia (GA)
Processed data for Hawaii (HI)
Processed data for Iowa (IA)
Processed data for Idaho (ID)
Processed data for Illinois (IL)
Processed data for Indiana (IN)
Processed data for Kansas (KS)
Processed data for Kentucky (KY)
Processed data for Louisiana (LA)
Processed data for Massachusetts (MA)
Processed data for Maryland (MD)
Processed data for Maine (ME)
Processed data for Michigan (MI)
Processed data for Minnesota (MN)
Processed data for Missouri (MO)
Processed data for Mississippi (MS)
Processed data for Montana (MT)
Processed data for North Carolina (NC)
Processed data for North Dakota (ND)
Processed data for Nebraska 

In [5]:
df_all_agencies.head()

Unnamed: 0,State,Agency Name,Latitude,Longitude,ORI,City
0,AK,Nome Police Department,64.783686,-164.188912,AK0010600,Nome
1,AK,Sitka Police Department,57.052124,-135.33418,AK0010900,Sitka
2,AK,Bethel Police Department,60.928916,-160.15335,AK0011300,Bethel
3,AK,Haines Police Department,59.098771,-135.576936,AK0012100,Haines
4,AK,Juneau Police Department,58.356556,-134.50731,AK0010300,Juneau


In [8]:
c = Census(census_api_key, year=2021)

# Dictionary mapping state abbreviations to FIPS codes
state_abbr_to_fips = {
    'AL': '01', 'AK': '02', 'AZ': '04', 'AR': '05', 'CA': '06', 'CO': '08', 'CT': '09',
    'DE': '10', 'DC': '11', 'FL': '12', 'GA': '13', 'HI': '15', 'ID': '16', 'IL': '17',
    'IN': '18', 'IA': '19', 'KS': '20', 'KY': '21', 'LA': '22', 'ME': '23', 'MD': '24',
    'MA': '25', 'MI': '26', 'MN': '27', 'MS': '28', 'MO': '29', 'MT': '30', 'NE': '31',
    'NV': '32', 'NH': '33', 'NJ': '34', 'NM': '35', 'NY': '36', 'NC': '37', 'ND': '38',
    'OH': '39', 'OK': '40', 'OR': '41', 'PA': '42', 'RI': '44', 'SC': '45', 'SD': '46',
    'TN': '47', 'TX': '48', 'UT': '49', 'VT': '50', 'VA': '51', 'WA': '53', 'WV': '54',
    'WI': '55', 'WY': '56'
}

# Function to get population data for a specific city and state
def get_population_for_city(city, state_abbr):
    # Convert state abbreviation to FIPS code
    state_fips = state_abbr_to_fips.get(state_abbr)
    if not state_fips:
        raise ValueError(f"Invalid state abbreviation: {state_abbr}")
    
    # Get population data for the specific city
    census_data = c.acs5.get(
        (
            "NAME",          # City name
            "B01003_001E",   # Total population
        ),
        {'for': 'place:*', 'in': f'state:{state_fips}'}
    )
    
    # Convert to DataFrame
    census_pd = pd.DataFrame(census_data)
    
    # Rename columns for clarity
    census_pd = census_pd.rename(
        columns = {
            "B01003_001E": "Population",
            "NAME": "City",
            "place": "PlaceID"
        }
    )
    
    # Find the population for the given city
    city_population = census_pd[census_pd['City'].str.strip() == city.strip()]['Population']
    
    # Return the population if found
    if not city_population.empty:
        return city_population.iloc[0]
    else:
        return None  # Return None if the city is not found in the API response

# Iterate over rows in df_all_agencies and append the population data
population_list = []

for index, row in df_all_agencies.iterrows():
    city = row['City']
    state_abbr = row['State']
    
    try:
        # Fetch the population for the current city
        population = get_population_for_city(city, state_abbr)
        population_list.append(population)
        print(f"Found Population Data for {city}, {state_abbr} and appended it to population_list.")
    except Exception as e:
        print(f"Error fetching population for {city}, {state_abbr}: {e}")
        population_list.append(None)  # Append None if there's an error

# Add the population data as a new column in df_all_agencies
df_all_agencies['Population'] = population_list

# Display the first few rows of the updated DataFrame
print(df_all_agencies.head())

# Save the final DataFrame with population data to a new CSV file
output_path = "../Resources/Agency/df_all_agencies_with_population.csv"
df_all_agencies.to_csv(output_path, index=False)
print(f"Data saved to {output_path}")


Found Population Data for Nome, AK and appended it to population_list.
Found Population Data for Sitka, AK and appended it to population_list.
Found Population Data for Bethel, AK and appended it to population_list.
Found Population Data for Haines, AK and appended it to population_list.
Found Population Data for Juneau, AK and appended it to population_list.
Found Population Data for Skagway, AK and appended it to population_list.
Found Population Data for Wrangell, AK and appended it to population_list.
Found Population Data for Anchorage, AK and appended it to population_list.
Found Population Data for Dillingham, AK and appended it to population_list.
Found Population Data for Petersburg, AK and appended it to population_list.
Found Population Data for Bristol Bay Borough, AK and appended it to population_list.
Found Population Data for North Slope Borough, AK and appended it to population_list.
Found Population Data for Hoonah, AK and appended it to population_list.
Found Populati

KeyboardInterrupt: 

In [None]:
################################  DO NOT RUN THIS CODEEEEEEEEE ##############################

















# Define the list of crimes
violent_crimes = ["rape", "robbery", "aggravated-assault"]
property_crimes = ["arson", "burglary", "larceny", "motor-vehicle-theft"]
all_crimes = violent_crimes + property_crimes

# Define the date range for 2023
begin_date = "01-2023"
end_date = "12-2023"
time_frame = f"?from={begin_date}&to={end_date}"

# Function to fetch crime data for a specific agency and crime
def fetch_crime_data(ori, crime, time_frame):
    # Replace this with your actual API base URL
    base_url = f'https://api.usa.gov/crime/fbi/cde/summarized/agency/{ori}/{crime}{time_frame}{api_key}'
    response = requests.get(base_url)
    if response.status_code != 200:
        raise ValueError(f"API call failed for ORI {ori}, crime {crime} with status {response.status_code}")
    return response.json()

# Function to process crime data and calculate totals for violent and property crimes
def process_crime_data(crime_data, violent_crimes, property_crimes):
    violent_total = 0
    property_total = 0
    population = 0

    # Process the data for each crime
    for crime, data in crime_data.items():
        if crime in violent_crimes:
            violent_total += sum(data.get("actuals", {}).values())
        elif crime in property_crimes:
            property_total += sum(data.get("actuals", {}).values())
        # Capture population once (assuming it's consistent across crimes)
        if not population:
            population = data.get("population", 0)

    return violent_total, property_total, population

# List to hold the results
results = []

# Iterate over each agency in the df_all_agencies DataFrame
for _, agency in df_all_agencies.iterrows():
    ori = agency["ORI"]
    state = agency["State"]
    agency_name = agency["Agency Name"]
    latitude = agency["Latitude"]
    longitude = agency["Longitude"]

    # Dictionary to hold crime data for the agency
    crime_data = {}
    
    try:
        # Fetch data for all crimes
        for crime in all_crimes:
            crime_data[crime] = fetch_crime_data(ori, crime, time_frame)

        # Calculate totals for violent and property crimes
        violent_total, property_total, population = process_crime_data(
            crime_data, violent_crimes, property_crimes
        )

        # Append the results
        results.append({
            "State": state,
            "Agency Name": agency_name,
            "ORI": ori,
            "Latitude": latitude,
            "Longitude": longitude,
            "2023 Violent Crime": violent_total,
            "2023 Property Crime": property_total,
            "Population": population
        })

        print(f"Processed data for ORI: {ori} - {agency_name}")

    except Exception as e:
        print(f"Error processing ORI: {ori} - {agency_name}: {e}")
    
    # Pause to avoid overloading the API
    # time.sleep(1)

# Create a DataFrame from the results
df_city_crime_data = pd.DataFrame(results)

# Calculate rates (instances per 100,000 people)
df_city_crime_data["Violent Crime Rate"] = (
    df_city_crime_data["2023 Violent Crime"] / df_city_crime_data["Population"] * 100000
).round(2)
df_city_crime_data["Property Crime Rate"] = (
    df_city_crime_data["2023 Property Crime"] / df_city_crime_data["Population"] * 100000
).round(2)
df_city_crime_data["Total Crime Rate"] = (
    (df_city_crime_data["2023 Violent Crime"] + df_city_crime_data["2023 Property Crime"]) / 
    df_city_crime_data["Population"] * 100000
).round(2)

# Sort the DataFrame by State and then by Total Crime Rate
df_city_crime_data = df_city_crime_data.sort_values(by=["State", "Total Crime Rate"])

# Save the DataFrame to a CSV file
output_path = "../Resources/Agency/city_crime_data_2023.csv"
df_city_crime_data.to_csv(output_path, index=False)

print(f"City crime data for 2023 saved to {output_path}")
