In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_key import key
from geoapi import geo_apikey

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy



In [3]:
#https://api.census.gov/data/2019/acs/acs5?get=NAME,group(B01003)&for=city:*&key=YOUR_API_KEY

import requests
import json
# Set base URL
base_url = "https://api.census.gov/data"
year = '/2022'
data_name = '/acs/acs1/subject'
#all of the B02015 table ID data for all states
#qry = '?get=group(S1903)&for=place'
qry = "?get=NAME,S1903_C03_001E&for=place"
key = str('&key='+ key)

    # Make an API request using the params dictionary
test = str(base_url + year + data_name + qry + key)
test
response = requests.get(base_url + year + data_name + qry + key)
result = response.json()
print (json.dumps(result, indent=4))


[
    [
        "NAME",
        "S1903_C03_001E",
        "state",
        "place"
    ],
    [
        "Auburn city, Alabama",
        "54839",
        "01",
        "03076"
    ],
    [
        "Birmingham city, Alabama",
        "39326",
        "01",
        "07000"
    ],
    [
        "Dothan city, Alabama",
        "53929",
        "01",
        "21184"
    ],
    [
        "Hoover city, Alabama",
        "103194",
        "01",
        "35896"
    ],
    [
        "Huntsville city, Alabama",
        "68930",
        "01",
        "37000"
    ],
    [
        "Mobile city, Alabama",
        "50051",
        "01",
        "50000"
    ],
    [
        "Montgomery city, Alabama",
        "55108",
        "01",
        "51000"
    ],
    [
        "Tuscaloosa city, Alabama",
        "49005",
        "01",
        "77256"
    ],
    [
        "Anchorage municipality, Alaska",
        "100751",
        "02",
        "03000"
    ],
    [
        "Avondale city, Arizona",
        "66801

In [4]:
#https://www.pgpf.org/blog/2023/11/income-and-wealth-in-the-united-states-an-overview-of-recent-data#:~:text=According%20to%20the%20ASEC%2C%20median,the%20pandemic%20and%20subsequent%20recession.
median_income = 74580
# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    
    # Convert JSON data to DataFrame
    df = pd.DataFrame(data[1:], columns=data[0:])
    
    # Save the DataFrame to a CSV file
    df.to_csv("../data/census_data.csv", index=False)
    print("Data saved to census_data.csv")
else:
    print("Error:", response.status_code)

Data saved to census_data.csv


In [5]:
# Convert the cities weather data into a Pandas DataFrame
city_eco_data_df  = pd.read_csv('../data/census_data.csv')

# Splitting the 'NAME' column into two columns 'City' and 'State'
city_eco_data_df[['City', 'State']] = city_eco_data_df['NAME'].str.split(', ', expand=True)
#city_eco_data_df.drop(columns=['NAME'], inplace=True)

# Show Record Count
city_eco_data_df.count()
city_eco_data_df.head()

Unnamed: 0,NAME,S1903_C03_001E,state,place,City,State
0,"Auburn city, Alabama",54839,1,3076,Auburn city,Alabama
1,"Birmingham city, Alabama",39326,1,7000,Birmingham city,Alabama
2,"Dothan city, Alabama",53929,1,21184,Dothan city,Alabama
3,"Hoover city, Alabama",103194,1,35896,Hoover city,Alabama
4,"Huntsville city, Alabama",68930,1,37000,Huntsville city,Alabama


In [6]:
## splitting the eco median data into low income and high income..
city_eco_data_df = pd.DataFrame(city_eco_data_df)
city_eco_data_df = city_eco_data_df.drop_duplicates()
#city_eco_data_df = city_eco_data_df.drop_duplicates(subset=['City'])
# Splitting the DataFrame into two based on the condition
threshold_value = 74580
df_above_threshold = city_eco_data_df[city_eco_data_df['S1903_C03_001E'] > threshold_value]
df_below_threshold = city_eco_data_df[city_eco_data_df['S1903_C03_001E'] <= threshold_value]

len(city_eco_data_df), len(df_below_threshold), len(df_below_threshold)
#df_above_threshold, df_below_threshold


(646, 320, 320)

In [7]:
#the Geoapify Geocoding service URL for getting Coords
def get_coordinates(city_name, geo_apikey):
    base_url = "https://api.geoapify.com/v1/geocode/search"
    params = {
        "text": city_name,
        "apiKey": geo_apikey
    }

    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data['features']:
            # Extracting the first result's coordinates
            coordinates = data['features'][0]['geometry']['coordinates']
            return coordinates
        else:
            return "No results found"
    else:
        return "Error in API request"

# pull the data
api_key = geo_apikey # Replace with your actual Geoapify API key
city_name1 = "New York"
coordinates = get_coordinates(city_name1, api_key)
print(f"Coordinates of {city_name1}: {coordinates}")

Coordinates of New York: [-74.0060152, 40.7127281]


In [8]:
# Path to the CSV file
coord_csv_file_path = "../data/city_coordinates.csv"

# Read and display the first few rows of the saved CSV file for verification
city_coordinates_df = pd.read_csv(coord_csv_file_path)
print(city_coordinates_df.head())

                       City   Latitude  Longitude
0      Auburn city, Alabama  32.601014 -85.516657
1  Birmingham city, Alabama  33.599853 -86.632216
2      Dothan city, Alabama  31.221808 -85.378001
3      Hoover city, Alabama  33.387197 -86.805680
4  Huntsville city, Alabama  34.729847 -86.585901


In [9]:
### doing some checking on files and data to process and compare a list of the eco data, and run it against the geoapify data.
# if the file exists it will then check if the city has been extracted with coords from geoapify 
## this was due to odd behavior in the api response looping through the city eco data frame and starting over again, 
## then not having an accurate record count after the query was done.

# Check if the CSV file already exists and read it, else create a new DataFrame
try:
    existing_data_df = city_coordinates_df
except FileNotFoundError:
    existing_data_df = pd.DataFrame(columns=['City', 'Latitude', 'Longitude'])

# Write headers to the CSV file if it's empty
if existing_data_df.empty:
    existing_data_df.to_csv(coord_csv_file_path, index=False)

# Iterating through the cities and fetching coordinates
for city in city_eco_data_df['NAME']:
    # Check if the city is already in the existing data
    if city not in existing_data_df['City'].values:
        coordinates = get_coordinates(city, api_key)
        if coordinates != [None, None]:
            # Creating a DataFrame for the current city
            temp_df = pd.DataFrame([{
                'City': city,
                'Latitude': coordinates[1],  # Latitude
                'Longitude': coordinates[0]  # Longitude
            }])
            
            # Append the current city's data to the CSV file
            temp_df.to_csv(coord_csv_file_path, mode='a', header=False, index=False)
            print(f"Getting data for {city}")
        else:
            print(f"Failed to fetch coordinates for {city}")
    else:
        print(f"{city} already exists in the dataset.")

# Read and display the first few rows of the saved CSV file for verification
city_coordinates_df = pd.read_csv(coord_csv_file_path)
city_coordinates_df = city_coordinates_df.drop_duplicates()
print(city_coordinates_df.head())

Auburn city, Alabama already exists in the dataset.
Birmingham city, Alabama already exists in the dataset.
Dothan city, Alabama already exists in the dataset.
Hoover city, Alabama already exists in the dataset.
Huntsville city, Alabama already exists in the dataset.
Mobile city, Alabama already exists in the dataset.
Montgomery city, Alabama already exists in the dataset.
Tuscaloosa city, Alabama already exists in the dataset.
Anchorage municipality, Alaska already exists in the dataset.
Avondale city, Arizona already exists in the dataset.
Buckeye city, Arizona already exists in the dataset.
Casas Adobes CDP, Arizona already exists in the dataset.
Chandler city, Arizona already exists in the dataset.
Flagstaff city, Arizona already exists in the dataset.
Gilbert town, Arizona already exists in the dataset.
Glendale city, Arizona already exists in the dataset.
Goodyear city, Arizona already exists in the dataset.
Maricopa city, Arizona already exists in the dataset.
Mesa city, Arizona

In [10]:
print(str(len(city_coordinates_df)) + ' compared to the source list length of: ' + str(len(city_eco_data_df)))


646 compared to the source list length of: 646


In [11]:
# Join the median range above/below threshold DFs to the Coords city DataFrames on the city names
high_inc_merged_df = df_above_threshold.merge(city_coordinates_df, left_on='NAME', right_on='City')
high_inc_merged_df = high_inc_merged_df.rename(columns={'S1903_C03_001E': 'High_med_inc'})
low_inc_merged_df = df_below_threshold.merge(city_coordinates_df, left_on='NAME', right_on='City')
low_inc_merged_df = low_inc_merged_df.rename(columns={'S1903_C03_001E': 'Low_med_inc'})

high_inc_merged_df.head(), low_inc_merged_df.head()

(                             NAME  High_med_inc  state  place  \
 0            Hoover city, Alabama        103194      1  35896   
 1  Anchorage municipality, Alaska        100751      2   3000   
 2           Buckeye city, Arizona         95235      4   7940   
 3       Casas Adobes CDP, Arizona         75650      4  10670   
 4          Chandler city, Arizona         98664      4  12000   
 
                    City_x    State                          City_y   Latitude  \
 0             Hoover city  Alabama            Hoover city, Alabama  33.387197   
 1  Anchorage municipality   Alaska  Anchorage municipality, Alaska  61.216313   
 2            Buckeye city  Arizona           Buckeye city, Arizona  33.370320   
 3        Casas Adobes CDP  Arizona       Casas Adobes CDP, Arizona  32.340932   
 4           Chandler city  Arizona          Chandler city, Arizona  33.306203   
 
     Longitude  
 0  -86.805680  
 1 -149.894852  
 2 -112.583776  
 3 -111.010201  
 4 -111.841185  ,
     

In [13]:
#Adding the cout of store type to the merged High income DF..
# Set parameters to search for commercial weapons stores
radius = 10000  # 100 kilometers
categories = "commercial.weapons"
#geo_apikey = geo_apikey  # Replace with your Geoapify API key

print("Starting commercial weapons store search")

# Iterate through the DataFrame
for index, row in high_inc_merged_df.iterrows():
    lat, lon = row['Latitude'], row['Longitude']  # Ensure these are the correct column names for latitude and longitude

    # Update the params dictionary with dynamic values
    params = {
        "apiKey": geo_apikey,
        "categories": categories,
        "filter": f"circle:{lon},{lat},{radius}",
        "bias": f"proximity:{lon},{lat}"
    }

    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"

    # Make an API request using the params dictionary
    response = requests.get(base_url, params=params)
    stores = response.json()

    # Count the number of stores found
    try:
        store_count = len(stores["features"])
        high_inc_merged_df.loc[index, "Store Count"] = store_count
    except (KeyError, IndexError):
        high_inc_merged_df.loc[index, "Store Count"] = 0

    print(f"{row['NAME']} - Number of stores found: {high_inc_merged_df.loc[index, 'Store Count']}")

Starting commercial weapons store search
Hoover city, Alabama - Number of stores found: 1.0
Anchorage municipality, Alaska - Number of stores found: 1.0
Buckeye city, Arizona - Number of stores found: 0.0
Casas Adobes CDP, Arizona - Number of stores found: 0.0
Chandler city, Arizona - Number of stores found: 12.0
Gilbert town, Arizona - Number of stores found: 20.0
Goodyear city, Arizona - Number of stores found: 0.0
Maricopa city, Arizona - Number of stores found: 0.0
Mesa city, Arizona - Number of stores found: 20.0
Peoria city, Arizona - Number of stores found: 3.0
Phoenix city, Arizona - Number of stores found: 1.0
Queen Creek town, Arizona - Number of stores found: 1.0
San Tan Valley CDP, Arizona - Number of stores found: 0.0
Scottsdale city, Arizona - Number of stores found: 0.0
Surprise city, Arizona - Number of stores found: 0.0
Tempe city, Arizona - Number of stores found: 4.0
Rogers city, Arkansas - Number of stores found: 0.0
Alameda city, California - Number of stores found

In [14]:
high_inc_merged_df.head()

Unnamed: 0,NAME,High_med_inc,state,place,City_x,State,City_y,Latitude,Longitude,Store Count
0,"Hoover city, Alabama",103194,1,35896,Hoover city,Alabama,"Hoover city, Alabama",33.387197,-86.80568,1.0
1,"Anchorage municipality, Alaska",100751,2,3000,Anchorage municipality,Alaska,"Anchorage municipality, Alaska",61.216313,-149.894852,1.0
2,"Buckeye city, Arizona",95235,4,7940,Buckeye city,Arizona,"Buckeye city, Arizona",33.37032,-112.583776,0.0
3,"Casas Adobes CDP, Arizona",75650,4,10670,Casas Adobes CDP,Arizona,"Casas Adobes CDP, Arizona",32.340932,-111.010201,0.0
4,"Chandler city, Arizona",98664,4,12000,Chandler city,Arizona,"Chandler city, Arizona",33.306203,-111.841185,12.0


In [16]:
#take the average amount of store types per high income city
# Calculate the total store count
total_store_count = high_inc_merged_df['Store Count'].sum()

# Calculate the number of locations
number_of_locations = len(high_inc_merged_df)

# Calculate the average store count per location
average_store_count = total_store_count / number_of_locations

print("Average Store Count per Location:", average_store_count)

Average Store Count per Location: 0.8742331288343558


In [17]:
#Adding the cout of store type to the merged LOW income DF..
# Set parameters to search for commercial weapons stores
radius = 10000  # 100 kilometers
categories = "commercial.weapons"
#geo_apikey = geo_apikey  # Replace with your Geoapify API key

print("Starting commercial weapons store search")

# Iterate through the DataFrame
for index, row in low_inc_merged_df.iterrows():
    lat, lon = row['Latitude'], row['Longitude']  # Ensure these are the correct column names for latitude and longitude

    # Update the params dictionary with dynamic values
    params = {
        "apiKey": geo_apikey,
        "categories": categories,
        "filter": f"circle:{lon},{lat},{radius}",
        "bias": f"proximity:{lon},{lat}"
    }

    # Set base URL
    base_url = "https://api.geoapify.com/v2/places"

    # Make an API request using the params dictionary
    response = requests.get(base_url, params=params)
    stores = response.json()

    # Count the number of stores found
    try:
        store_count = len(stores["features"])
        low_inc_merged_df.loc[index, "Store Count"] = store_count
    except (KeyError, IndexError):
        low_inc_merged_df.loc[index, "Store Count"] = 0

    print(f"{row['NAME']} - Number of stores found: {low_inc_merged_df.loc[index, 'Store Count']}")

Starting commercial weapons store search
Auburn city, Alabama - Number of stores found: 0.0
Birmingham city, Alabama - Number of stores found: 1.0
Dothan city, Alabama - Number of stores found: 0.0
Huntsville city, Alabama - Number of stores found: 0.0
Mobile city, Alabama - Number of stores found: 0.0
Montgomery city, Alabama - Number of stores found: 0.0
Tuscaloosa city, Alabama - Number of stores found: 0.0
Avondale city, Arizona - Number of stores found: 0.0
Flagstaff city, Arizona - Number of stores found: 1.0
Glendale city, Arizona - Number of stores found: 4.0
Tucson city, Arizona - Number of stores found: 0.0
Yuma city, Arizona - Number of stores found: 1.0
Conway city, Arkansas - Number of stores found: 0.0
Fayetteville city, Arkansas - Number of stores found: 0.0
Fort Smith city, Arkansas - Number of stores found: 1.0
Jonesboro city, Arkansas - Number of stores found: 0.0
Little Rock city, Arkansas - Number of stores found: 1.0
Springdale city, Arkansas - Number of stores fou

In [18]:
#take the average amount of store types per LOW income city
# Calculate the total store count
total_store_count_low = low_inc_merged_df['Store Count'].sum()

# Calculate the number of locations
number_of_locations_low = len(low_inc_merged_df)

# Calculate the average store count per location
average_store_count_low = total_store_count_low / number_of_locations_low

print("Average Store Count per Location:", average_store_count_low)

Average Store Count per Location: 0.61875


In [19]:
#save the modified low/high income DFs with additional store coutn summaries to a csv

high_inc_merged_df.to_csv('../data/high_inc_data.csv', index=False)
low_inc_merged_df.to_csv('../data/low_inc_data.csv', index=False)