In [1]:
import pandas as pd
import numpy as np

## Denver neighborhoods and coordinates

Geojson data for Denver was obtained from https://www.kaggle.com/broach/denverairbnb?select=neighbourhoods.geojson.
Coordinates in the original json file are provided for each Denver neighorhood in a polygon shape. The center coordinates for each neighborhood were calcuated by averaging the highest number and lowest number for latitudes and longitudes, respectively.

In [4]:
df_neighborhood.to_csv('Denver_neighborhoods.csv')

In [5]:
import json

with open('neighbourhoods.geojson') as f:
    data = json.load(f)


In [6]:
Denver_coordinates = {}
for feature in data['features']:
    coordinates = feature['geometry']['coordinates'][0][0]
    neighborhood = feature['properties']['neighbourhood']
    Denver_coordinates[neighborhood] = coordinates

In [7]:
for neighorhood in Denver_coordinates:
    coordinates = Denver_coordinates[neighorhood]
    flat_list = [item for sublist in coordinates for item in sublist]
    longitudes = flat_list[::2]
    latitudes = flat_list[1::2]
    low_lat = min(latitudes)
    high_lat = max(latitudes)
    low_lng = min(longitudes)
    high_lng = max(longitudes)
    center_lat = low_lat + ((high_lat - low_lat)/2)
    center_lng = low_lng + ((high_lng - low_lng)/2)
    coordinates = (center_lat,center_lng)
    Denver_coordinates[neighorhood] = coordinates


In [8]:
df_Denver_coordinates = pd.DataFrame(Denver_coordinates).transpose().reset_index()
df_Denver_coordinates.columns = ['Neighborhood','Latitude','Longitude']
df_Denver_coordinates


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Chaffee Park,39.788557,-105.011692
1,Sunnyside,39.776827,-105.011738
2,Highland,39.761623,-105.011746
3,University,39.675101,-104.966432
4,Globeville,39.780219,-104.982894
...,...,...,...
73,Union Station,39.752963,-104.999938
74,Lowry Field,39.719904,-104.891712
75,Five Points,39.759543,-104.987542
76,Stapleton,39.780144,-104.865912


Visualize the neighborhoods in a map using Folium package

In [9]:
import folium
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [10]:
address = 'Denver, Colorado'

geolocator = Nominatim(user_agent="denver_explorer")
location = geolocator.geocode(address)
Denver_lat = location.latitude
Denver_lng = location.longitude

In [11]:
Denver_map = folium.Map(location=[Denver_lat, Denver_lng], zoom_start=11)

for lat, lng, label in zip(df_Denver_coordinates.Latitude, df_Denver_coordinates.Longitude, df_Denver_coordinates.Neighborhood):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(Denver_map)

Denver_map

## Housing data for neighborhoods

Raw data was downloaded from Zillow: https://www.zillow.com/research/data/.

In [12]:
df_Zillow_housing = pd.read_csv('Neighborhood_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
df_Zillow_housing.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,1996-01-31,...,2020-02-29,2020-03-31,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30
0,274772,0,Northeast Dallas,Neighborhood,TX,TX,Dallas,Dallas-Fort Worth-Arlington,Dallas County,132343.0,...,324845.0,324917.0,326237.0,328195.0,330590.0,333233.0,335638.0,338601.0,343524.0,348957.0
1,112345,1,Maryvale,Neighborhood,AZ,AZ,Phoenix,Phoenix-Mesa-Scottsdale,Maricopa County,,...,190385.0,192596.0,195149.0,197695.0,200059.0,202769.0,205996.0,209917.0,213816.0,218411.0
2,192689,2,Paradise,Neighborhood,NV,NV,Las Vegas,Las Vegas-Henderson-Paradise,Clark County,138996.0,...,268916.0,270899.0,272631.0,273723.0,274122.0,275312.0,277788.0,281152.0,283765.0,286100.0
3,270958,3,Upper West Side,Neighborhood,NY,NY,New York,New York-Newark-Jersey City,New York County,254412.0,...,1254324.0,1243722.0,1244017.0,1243078.0,1241951.0,1247438.0,1252656.0,1258274.0,1252099.0,1245560.0
4,118208,4,South Los Angeles,Neighborhood,CA,CA,Los Angeles,Los Angeles-Long Beach-Anaheim,Los Angeles County,133918.0,...,529049.0,534504.0,539172.0,541698.0,544530.0,549764.0,557257.0,565000.0,571646.0,577043.0


Data were then extracted for the neighborhoods in Denver, CO

In [13]:
df_Denver_housing = df_Zillow_housing[df_Zillow_housing['City']=='Denver']
df_Denver_housing.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,1996-01-31,...,2020-02-29,2020-03-31,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30
304,273809,316,Gateway - Green Valley Ranch,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,128553.0,...,373295.0,375486.0,377419.0,379089.0,381121.0,383398.0,386001.0,388096.0,391399.0,395100.0
315,6018,327,Montbello,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,100523.0,...,347293.0,349540.0,351923.0,353631.0,355504.0,357047.0,359112.0,360799.0,363765.0,366846.0
586,268671,615,Hampden,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,128596.0,...,440380.0,443045.0,445428.0,447507.0,449973.0,452785.0,456636.0,460062.0,465025.0,470006.0
645,275564,677,Stapleton,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,217134.0,...,638405.0,641295.0,644282.0,646508.0,648232.0,650010.0,652458.0,654955.0,659153.0,664170.0
772,268778,809,Westwood,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,76278.0,...,314629.0,316856.0,318821.0,320636.0,322195.0,324022.0,326061.0,328599.0,332777.0,337175.0


In [14]:
df_Denver_housing['RegionName'].value_counts

<bound method IndexOpsMixin.value_counts of 304      Gateway - Green Valley Ranch
315                         Montbello
586                           Hampden
645                         Stapleton
772                          Westwood
                     ...             
6697                         Rosedale
8191                     Civic Center
9514                              DIA
9517                       Sun Valley
11805                         Auraria
Name: RegionName, Length: 77, dtype: object>

Discrepancy in the number of neighborhoods was observed: 78 neighoborhoods in df_Denver_coordinates vs. 77 neighoborhoods in df_Denver_housing. It would be desirable to compare the difference of neighoborhood list in these two dataframes.

In [15]:
unique_coordinates = [nbhd for nbhd in set(df_Denver_coordinates.Neighborhood) if nbhd not in set(df_Denver_housing.RegionName)]
unique_housing = [nbhd for nbhd in set(df_Denver_housing.RegionName) if nbhd not in set(df_Denver_coordinates.Neighborhood)]

print('The unique neighborhoods in df_Denver_coordinates include: ',unique_coordinates)
print('The unique neighborhoods in df_Denver_housing include: ',unique_housing)

The unique neighborhoods in df_Denver_coordinates include:  ['CBD', 'Kennedy']
The unique neighborhoods in df_Denver_housing include:  ['Central Business District']


In df_Denver_housing, we will change the name "Central Business District" to "CBD" for consistency. To handle the missing value for Kennedy, we will use the data for Hampden for Kennedy, considering that Hampden is the only neighboring neighhorhood of Kennedy, thus similar housing price (this may not be true due to potential differences in housing type distribution, year of built, etc.). 

In [None]:
df_Denver_housing.loc[df_Denver_housing['RegionName'] =='Central Business District', 'RegionName'] = 'CBD'

In [17]:
New_Kennedy = df_Denver_housing.loc[df_Denver_housing['RegionName'] =='Hampden'].replace('Hampden','Kennedy')
df_Denver_housing = df_Denver_housing.append(New_Kennedy, ignore_index=True) #df.append doesn't change df, need to update df

In [18]:
df_Denver_housing.loc[df_Denver_housing['RegionName'].isin(['Kennedy','Hampden','CBD'])]

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,1996-01-31,...,2020-02-29,2020-03-31,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30
2,268671,615,Hampden,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,128596.0,...,440380.0,443045.0,445428.0,447507.0,449973.0,452785.0,456636.0,460062.0,465025.0,470006.0
64,268632,4720,CBD,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,154485.0,...,408067.0,408634.0,409031.0,408914.0,409366.0,409993.0,410857.0,411928.0,413550.0,415906.0
77,268671,615,Kennedy,Neighborhood,CO,CO,Denver,Denver-Aurora-Lakewood,Denver County,128596.0,...,440380.0,443045.0,445428.0,447507.0,449973.0,452785.0,456636.0,460062.0,465025.0,470006.0


The data from Zillow is time series from 1996 to 2020. We will use the most recent data (last column "2020-11-30") as an indicator of Denver housing index.

In [19]:
df_Denver_housing = df_Denver_housing[['RegionName','2020-11-30']]
df_Denver_housing.rename(columns={'RegionName':'Neighborhood','2020-11-30':'HousingIndex'},inplace=True)

In [20]:
df_Denver_housing

Unnamed: 0,Neighborhood,HousingIndex
0,Gateway - Green Valley Ranch,395100.0
1,Montbello,366846.0
2,Hampden,470006.0
3,Stapleton,664170.0
4,Westwood,337175.0
...,...,...
73,Civic Center,578786.0
74,DIA,279800.0
75,Sun Valley,433999.0
76,Auraria,720482.0


In [21]:
denver_geo = r'neighbourhoods.geojson'

Denver_map.choropleth(
    geo_data=denver_geo,
    data=df_Denver_housing,
    columns=['Neighborhood', 'HousingIndex'],
    key_on='feature.properties.neighbourhood',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Denver Housing Index'
)

Denver_map



## Analyze Denver Neighborhoods

Now we use Foursquare API to explore Denver neighbohoods.

In [22]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

Foursquare has a large number of categories, but only a few were selected in this application that are very relevant to the needs of working parents of young children. Categories selected include Child Care Service (5744ccdfe4b0c0459246b4c7), Baby Store (52f2ab2ebcbc57f1066b8b32), Preschool (52e81612bcbc57f1066b7a45), Drugstore (5745c2e4498e11e7bccabdbd), Grocery Store (4bf58dd8d48988d118951735), Organic Grocery (52f2ab2ebcbc57f1066b8b45), Urgent Care Center (56aa371be4b08b9a8d573526), Gym / Fitness Center (4bf58dd8d48988d175941735), and Shopping Mall (4bf58dd8d48988d1fd941735).

In [23]:
categoryId = '5744ccdfe4b0c0459246b4c7,52f2ab2ebcbc57f1066b8b32,52e81612bcbc57f1066b7a45,5745c2e4498e11e7bccabdbd,4bf58dd8d48988d118951735,52f2ab2ebcbc57f1066b8b45,56aa371be4b08b9a8d573526,4bf58dd8d48988d175941735,4bf58dd8d48988d1fd941735'
radius = 1000

In [24]:
import requests

In [25]:
def getNearbyVenues(names, latitudes, longitudes, categoryId, radius):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
                  
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            categoryId,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
Denver_venues = getNearbyVenues(df_Denver_coordinates['Neighborhood'], df_Denver_coordinates['Latitude'], df_Denver_coordinates['Longitude'], categoryId, radius)

In [27]:
print(Denver_venues.shape)
Denver_venues.head()

(1091, 5)


Unnamed: 0,Neighborhood,Latitude,Longitude,Venue,Venue Category
0,Chaffee Park,39.788557,-105.011692,Save-A-Lot,Grocery Store
1,Chaffee Park,39.788557,-105.011692,Colorado Ranch Market,Grocery Store
2,Chaffee Park,39.788557,-105.011692,"Zumba, Zumba, Zumba at Jen's House",Gym
3,Chaffee Park,39.788557,-105.011692,Kostman Athletic Club,Martial Arts School
4,Sunnyside,39.776827,-105.011738,Sunnyside Natural Market,Grocery Store


In [28]:
for nbhd in set(df_Denver_housing['Neighborhood']):
    if nbhd not in set(Denver_venues['Neighborhood']):
        print('No data retrived for {} neighborhood in Foursqure.'.format(nbhd))

No data retrived for DIA neighborhood in Foursqure.


It is possible to rank the neighborhoods based on the total number of venues, but instead we might want to group the neighborhoods  on a few levels (e.g., high-medium-low level of facility abundancy) to provide flexibility and enable effective decision making in choosing neighborhoods.

In [29]:
Denver_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Latitude,Longitude,Venue,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Athmar Park,4,4,4,4
Auraria,32,32,32,32
Baker,5,5,5,5
Barnum,5,5,5,5
Barnum West,2,2,2,2
...,...,...,...,...
West Colfax,4,4,4,4
West Highland,28,28,28,28
Westwood,2,2,2,2
Whittier,8,8,8,8


In [30]:
Denver_venues_count = Denver_venues.groupby('Neighborhood').count().reset_index()
Denver_venues_count.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,Venue,Venue Category
0,Athmar Park,4,4,4,4
1,Auraria,32,32,32,32
2,Baker,5,5,5,5
3,Barnum,5,5,5,5
4,Barnum West,2,2,2,2


In [31]:
Denver_venues_count = Denver_venues_count[['Neighborhood','Venue']].rename(columns={'Venue':'VenueCount'})

We use k-means to cluster Denver neighborhoods based on VanueCount 

In [32]:
from sklearn.cluster import KMeans

In [33]:
k = 3
Denver_venues_clusters = Denver_venues_count.drop(["Neighborhood"], axis=1)
kmeans = KMeans(init="k-means++", n_clusters=k, random_state=0, n_init=12).fit(Denver_venues_clusters)

Denver_venues_count['Label'] = kmeans.labels_
Denver_venues_count

Unnamed: 0,Neighborhood,VenueCount,Label
0,Athmar Park,4,0
1,Auraria,32,2
2,Baker,5,0
3,Barnum,5,0
4,Barnum West,2,0
...,...,...,...
72,West Colfax,4,0
73,West Highland,28,2
74,Westwood,2,0
75,Whittier,8,0


From the table above, it appears that the more abundant the facilities are, the larger number the label has. Therefore, we can safely assign label "0" to DIA neighborhood, which returned zero venues.

In [34]:
DIA_venues_count = {'Neighborhood':'DIA','VenueCount':0, 'Label':0}
Denver_venues_count = Denver_venues_count.append(DIA_venues_count,ignore_index=True)
Denver_venues_count

Unnamed: 0,Neighborhood,VenueCount,Label
0,Athmar Park,4,0
1,Auraria,32,2
2,Baker,5,0
3,Barnum,5,0
4,Barnum West,2,0
...,...,...,...
73,West Highland,28,2
74,Westwood,2,0
75,Whittier,8,0
76,Windsor,6,0


In [35]:
df_Denver_clusters = df_Denver_coordinates.join(Denver_venues_count.set_index('Neighborhood'),on='Neighborhood')

In [36]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [37]:
# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_Denver_clusters['Latitude'], df_Denver_clusters['Longitude'], df_Denver_clusters['Neighborhood'], df_Denver_clusters['Label']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(Denver_map)
       
Denver_map