In [1]:
# Dependencies and Setup
import hvplot.pandas as hvplot
import pandas as pd
import requests
import matplotlib.pyplot as plt
import numpy as np
import time
from scipy.stats import linregress
import json

# Import API key
from alan_api_keys import geoapify_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy



In [2]:
#Read CSV Files
high_df = pd.read_csv('../data/high_inc_data.csv')
low_df = pd.read_csv('../data/low_inc_data.csv')

#Create into data frames
df_high = pd.DataFrame(high_df)
df_low = pd.DataFrame(low_df)


In [3]:
df_high.head()


Unnamed: 0,NAME,High_med_inc,state,place,City_x,State,City_y,Latitude,Longitude,Store Count
0,"Hoover city, Alabama",103194,1,35896,Hoover city,Alabama,"Hoover city, Alabama",33.387197,-86.80568,1.0
1,"Anchorage municipality, Alaska",100751,2,3000,Anchorage municipality,Alaska,"Anchorage municipality, Alaska",61.216313,-149.894852,1.0
2,"Buckeye city, Arizona",95235,4,7940,Buckeye city,Arizona,"Buckeye city, Arizona",33.37032,-112.583776,0.0
3,"Casas Adobes CDP, Arizona",75650,4,10670,Casas Adobes CDP,Arizona,"Casas Adobes CDP, Arizona",32.340932,-111.010201,0.0
4,"Chandler city, Arizona",98664,4,12000,Chandler city,Arizona,"Chandler city, Arizona",33.306203,-111.841185,12.0


In [4]:
df_low.head()

Unnamed: 0,NAME,Low_med_inc,state,place,City_x,State,City_y,Latitude,Longitude,Store Count
0,"Auburn city, Alabama",54839,1,3076,Auburn city,Alabama,"Auburn city, Alabama",32.601014,-85.516657,0.0
1,"Birmingham city, Alabama",39326,1,7000,Birmingham city,Alabama,"Birmingham city, Alabama",33.599853,-86.632216,1.0
2,"Dothan city, Alabama",53929,1,21184,Dothan city,Alabama,"Dothan city, Alabama",31.221808,-85.378001,0.0
3,"Huntsville city, Alabama",68930,1,37000,Huntsville city,Alabama,"Huntsville city, Alabama",34.729847,-86.585901,0.0
4,"Mobile city, Alabama",50051,1,50000,Mobile city,Alabama,"Mobile city, Alabama",30.689362,-88.051896,0.0


In [6]:
#Clean DataFrames

#pulling cols into var
high_col_to_pull = ['City_x', 'State', 'Latitude', 'Longitude', 'High_med_inc']
low_col_to_pull = ['City_x', 'State', 'Latitude', 'Longitude', 'Low_med_inc']

#Selected df
high_sel_df = df_high[high_col_to_pull]
low_sel_df = df_low[low_col_to_pull]

high_sel_df.columns = ['Cities', 'States', 'Lat', 'Lon', 'High Income']
low_sel_df.columns = ['Cities', 'States', 'Lat', 'Lon', 'Low Income']

#removed the word'city' from the City/Town Column
high_sel_df['Cities'] = high_sel_df['Cities'].str.replace(' city', '', case=False)
low_sel_df['Cities'] = low_sel_df['Cities'].str.replace(' city', '', case=False)


#removed the word'town' from the City/Town Column
high_sel_df['Cities'] = high_sel_df['Cities'].str.replace(' town', '', case=False)
low_sel_df['Cities'] = low_sel_df['Cities'].str.replace(' town', '', case=False)

#removed the word'town' from the City/Town Column
high_sel_df['Cities'] = high_sel_df['Cities'].str.replace(' CDP', '', case=False)
low_sel_df['Cities'] = low_sel_df['Cities'].str.replace(' CDP', '', case=False)

#Cleaned DataFrames
high_city_df = high_sel_df
low_city_df = low_sel_df

high_city_df['Cities'].drop_duplicates
low_city_df['Cities'].drop_duplicates

#clean city cols

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  high_sel_df['Cities'] = high_sel_df['Cities'].str.replace(' city', '', case=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  low_sel_df['Cities'] = low_sel_df['Cities'].str.replace(' city', '', case=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  high_sel_df['Cities'] = high_sel_df['Citi

<bound method Series.drop_duplicates of 0                    Auburn
1                Birmingham
2                    Dothan
3                Huntsville
4                    Mobile
               ...         
315      Caguas zona urbana
316    Carolina zona urbana
317    Guaynabo zona urbana
318       Ponce zona urbana
319    San Juan zona urbana
Name: Cities, Length: 320, dtype: object>

In [7]:
high_city_df.head()

Unnamed: 0,Cities,States,Lat,Lon,High Income
0,Hoover,Alabama,33.387197,-86.80568,103194
1,Anchorage municipality,Alaska,61.216313,-149.894852,100751
2,Buckeye,Arizona,33.37032,-112.583776,95235
3,Casas Adobes,Arizona,32.340932,-111.010201,75650
4,Chandler,Arizona,33.306203,-111.841185,98664


In [8]:
low_city_df.head()

Unnamed: 0,Cities,States,Lat,Lon,Low Income
0,Auburn,Alabama,32.601014,-85.516657,54839
1,Birmingham,Alabama,33.599853,-86.632216,39326
2,Dothan,Alabama,31.221808,-85.378001,53929
3,Huntsville,Alabama,34.729847,-86.585901,68930
4,Mobile,Alabama,30.689362,-88.051896,50051


In [9]:
low_city_df.groupby('States')['Cities'].count()

States
Alabama            7
Arizona            5
Arkansas           6
California        28
Colorado           3
Connecticut        5
Delaware           1
Florida           37
Georgia            7
Idaho              3
Illinois           9
Indiana            9
Iowa               7
Kansas             4
Kentucky           3
Louisiana          7
Maine              1
Maryland           2
Massachusetts      7
Michigan          10
Minnesota          4
Mississippi        2
Missouri           6
Montana            1
Nebraska           2
Nevada             5
New Jersey         9
New Mexico         3
New York           8
North Carolina    10
North Dakota       2
Ohio               9
Oklahoma           4
Oregon             4
Pennsylvania       7
Puerto Rico        6
Rhode Island       2
South Carolina     4
South Dakota       2
Tennessee          8
Texas             37
Utah               3
Virginia           7
Washington         5
Wisconsin          8
Wyoming            1
Name: Cities, dtype: int64

In [10]:
high_city_df.groupby('States')['Cities'].count()

States
Alabama                   1
Alaska                    1
Arizona                  14
Arkansas                  1
California              114
Colorado                 16
Connecticut               3
District of Columbia      1
Florida                  20
Georgia                   6
Hawaii                    1
Idaho                     2
Illinois                 10
Indiana                   3
Iowa                      2
Kansas                    3
Maryland                 10
Massachusetts            10
Michigan                  7
Minnesota                 9
Missouri                  3
Montana                   1
Nevada                    4
New Hampshire             2
New Jersey                4
New Mexico                1
New York                  2
North Carolina            5
Oklahoma                  2
Oregon                    4
Rhode Island              2
South Carolina            2
Tennessee                 1
Texas                    27
Utah                      8
Virginia     

In [13]:
high_city_df.head(1)

Unnamed: 0,Cities,States,Lat,Lon,High Income
0,Hoover,Alabama,33.387197,-86.80568,103194


In [17]:
# Set parameters to search for bar (no category for liquor store, use bar instead)
radius = 5000  
categories = "commercial.food_and_drink.health_food"

print("Starting commercial.food_and_drink.health_food search...")

# Iterate through the DataFrame to get bar count for high income cities
for index, row in high_city_df.iterrows():
    lat, lon = row['Lat'], row['Lon']  

    params = {"apiKey": geoapify_key,
              "categories": categories,
              "limit": 20,
             "filter": f"circle:{lon},{lat},{radius}",
             "bias": f"proximity:{lon},{lat}"}

    url = "https://api.geoapify.com/v2/places"
    data = requests.get(url, params=params).json()
   
    # Count the number of bars
    try:
        high_city_df.loc[index, "Health Food Count"] = len(data["features"])
    except (KeyError, IndexError):
        high_city_df.loc[index, "Health Food Count"] = 0

    print(f"{row['Cities']} - # of health food found within 5000m radius: {high_city_df.loc[index, 'Health Food Count']}")

Starting commercial.food_and_drink.health_food search...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  high_city_df.loc[index, "Health Food Count"] = len(data["features"])


Hoover - # of health food found within 5000m radius: 0.0
Anchorage municipality - # of health food found within 5000m radius: 0.0
Buckeye - # of health food found within 5000m radius: 0.0
Casas Adobes - # of health food found within 5000m radius: 0.0
Chandler - # of health food found within 5000m radius: 0.0
Gilbert - # of health food found within 5000m radius: 0.0
Goodyear - # of health food found within 5000m radius: 0.0
Maricopa - # of health food found within 5000m radius: 0.0
Mesa - # of health food found within 5000m radius: 0.0
Peoria - # of health food found within 5000m radius: 0.0
Phoenix - # of health food found within 5000m radius: 0.0
Queen Creek - # of health food found within 5000m radius: 0.0
San Tan Valley - # of health food found within 5000m radius: 0.0
Scottsdale - # of health food found within 5000m radius: 1.0
Surprise - # of health food found within 5000m radius: 0.0
Tempe - # of health food found within 5000m radius: 0.0
Rogers - # of health food found within 50

KeyboardInterrupt: 