# Best place to live for newcomers to Raleigh NC


In [None]:
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab

In [None]:
!conda install -c conda-forge folium 

In [None]:
!conda install -c conda-forge Geocoder

### Import required libraries 

In [None]:
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from bs4 import BeautifulSoup as bs
import numpy as np

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

import geocoder as ge

import datetime

from dateutil.parser import parse

import re


print('Libraries imported.')

## Raleigh NC
#### Raleigh is the capital city of North Carolina, belongs to Wake County and has been frequently ranked as the top cities to live by Forbes.
#### Since 2017, over 2000 new residents every month come to Wake County. For these new comers, it is always difficult for them to choose which part of Raleigh is more appropriate for them. This project will be analyzing Raleigh in terms of 
###### * Saftety (police incidents)
###### * Education (public schools)
###### * Convenience 
###### * Development plan


#### Let's first look at the map of Raleigh NC.

In [None]:
address = 'Raleigh, NC'

geolocator = Nominatim(user_agent="nc_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Raleigh NC are {}, {}.'.format(latitude, longitude))

# create map of Raleigh using latitude and longitude values
map_raleigh = folium.Map(location=[latitude, longitude], zoom_start=10)
map_raleigh

## Raleigh Safety - Police Incidents 

In [None]:
!wget -q -O 'raleigh_police_inc.json' https://opendata.arcgis.com/datasets/24c0b37fa9bb4e16ba8bcaa7e806c615_0.geojson

print('Data downloaded!')
with open('raleigh_police_inc.json') as json_data:
    json_data = json.load(json_data)

pol_inc=json_data['features']

In [None]:
# define the dataframe columns
column_names = ["objectid","district","crime_category","crime_description","latitude","longitude","reported_date"]

# instantiate the dataframe
pol_inc_df= pd.DataFrame(columns=column_names)

## Read in all 2019 police incidents with the latitude and longitude informaiton
for data in pol_inc:
    value=data["properties"]
    if (datetime.datetime.strptime(value["reported_date"][0:10], '%Y-%m-%d').date() >=  datetime.date(2019, 1, 1)):
        pol_inc_df = pol_inc_df.append({'objectid':value["OBJECTID"],
                                          'district': value["district"],
                                          'crime_category': value["crime_category"],
                                          'crime_description': value["crime_description"],
                                           'latitude': value['latitude'],
                                            'longitude': value['longitude'],
                                           'reported_date': value["reported_date"]                                         
                                         }, ignore_index=True)


In [None]:
#drop incidents without geo information 
pol_inc_df_wloc=pol_inc_df.dropna(subset=['latitude'])
print(pol_inc_df_wloc.shape)
print(pol_inc_df_wloc.head(10))

In [None]:
# definition of the boundaries in the map for districts used in police incident reports
url='https://opendata.arcgis.com/datasets'
dist_bound=f'{url}/a8cd851199cc46029b6b73151711ab75_0.geojson'

# calculating total number of incidents per district
crimedata2 = pd.DataFrame(pol_inc_df_wloc['district'].value_counts().astype(float))
crimedata2.to_json('crimeagg.json')
crimedata2 = crimedata2.reset_index()
crimedata2.columns = ['DISTRICT', 'Number']

map_raleigh = folium.Map(location=[latitude, longitude], zoom_start=11)

folium.Choropleth(
    geo_data=dist_bound,
    name='choropleth',
    data=crimedata2,
    columns=['DISTRICT', 'Number'],
    key_on='feature.properties.DISTRICT',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    highlight=True,
    legend_name='Number of incidents per district'
).add_to(map_raleigh)

folium.LayerControl().add_to(map_raleigh)
map_raleigh


In [None]:
#Print count of incidents per district 
crimedata2.set_index(["DISTRICT"],inplace=True)
crimedata2

#### Community Survey 
##### Check how residents say about their neighborhoods and get the top neighborhoods rated by residents 

In [None]:
# download raleigh recreation and parks information from opendata
!wget -q -O 'con_survey.json' https://opendata.arcgis.com/datasets/c12b5ee6a2e649d2b375b1c0a9b53fea_0.geojson

print('Data downloaded!')

with open('con_survey.json') as json_data:
    json_data = json.load(json_data)

con_survey=json_data['features']


In [None]:
# populate column names 
column_names=[]
for dic in con_survey[:1]:
    for val, cal in dic['properties'].items():
        column_names.append(val)

df_survey=pd.DataFrame(columns=column_names)
df_survey.shape
df_survey

In [None]:
df_survey=pd.DataFrame([])
#populate column values 
for dic in con_survey:
    df=pd.DataFrame(dic['properties'].items())
#     print(df)
#     print("---------------------")
    df=df.transpose()
#     print(df)
#     print("---------------------")
    df.columns=df.iloc[0]
#     print(df.columns)
#     print("---------------------")
    df=df.drop(df.index[[0]])
    df_survey=df_survey.append(df)
    

In [None]:
print(df_survey.shape)
#drop surveys where the latitude or longitude missing 
df_survey.dropna(subset=['BLOCK_LAT', 'BLOCK_LON'],inplace=True)
print(df_survey.shape)
df_survey.head(5)

In [None]:
#keep only variables related to neighborhood evaluation
neighborhood_cols2=[col for col in df_survey.columns if 'neighborhood' in col]
neighborhood_cols=['ID', 'DISTRICT', 'BLOCK_LAT', 'BLOCK_LON', 'Overall_quality_of_life_in_your_neighborhood__1_02', 'Cleanliness_of_your_neighborhood__12_11', 'Impact_of_changes_being_made_in_and_around_your_neighborhood__12_14', 'New_construction_s_compatibility_with_existing_neighborhood_building_patterns__12_15', 'Your_neighborhood_s_ability_to_support_a_healthy_lifestyle__12_2', 'In_your_neighborhood_during_the_day__14_05', 'In_your_neighborhood_at_night__14_06', 'Condition_of_streets_in_your_neighborhood__16_05', 'Condition_of_sidewalks_in_your_neighborhood__16_07', 'Availability_of_sidewalks_in_your_neighborhood__16_09']

neighborhoods_df=df_survey[neighborhood_cols].set_index('ID')
print(neighborhoods_df.shape)

#convert scores to numeric values 
for i in range(0, len(neighborhoods_df.columns)):
    neighborhoods_df.iloc[:,i] = pd.to_numeric(neighborhoods_df.iloc[:,i], errors='ignore')

#check data types after conversion 
print(neighborhoods_df.info())

In [None]:
# group neighborhoods by their district and look at the average score
neighborhodds_df_distgrouped=neighborhoods_df.drop(['BLOCK_LAT','BLOCK_LON'],axis=1).groupby('DISTRICT').mean()
neighborhodds_df_distgrouped.sort_values(['Overall_quality_of_life_in_your_neighborhood__1_02'],ascending=False,inplace=True)
# print(neighborhodds_df_distgrouped.head(10))

neighborhodds_df_distgrouped.reset_index(inplace=True)


In [None]:
neighborhodds_df_distgrouped['COUNCIL_DIST']=neighborhodds_df_distgrouped['DISTRICT']
neighborhodds_df_distgrouped

In [None]:
neighborhodds_df_distgrouped.columns

In [None]:
#Visualize survey results on map by Raleigh City Council Districts
# definition of the boundaries in the map for districts used in police incident reports
url='https://opendata.arcgis.com/datasets'
dist_bound=f'{url}/9a5733e13dd14e2f80f8517738ce8cc6_2.geojson'

map_raleigh = folium.Map(location=[latitude, longitude], zoom_start=11)

folium.Choropleth(
    geo_data=dist_bound,
    name='choropleth',
    data=neighborhodds_df_distgrouped,
    columns=['COUNCIL_DIST', 'Overall_quality_of_life_in_your_neighborhood__1_02'],
    key_on='feature.properties.COUNCIL_DIST',
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    highlight=True,
    legend_name='Overall Quality of Life'
).add_to(map_raleigh)

folium.LayerControl().add_to(map_raleigh)
map_raleigh

#### Neighborhoods in North West
##### Based on the safety district analsis, north west is the top choice.
##### Based on the survey, both north west and north districts have the highest quality of life
#### Combine both district analysis, let's recommend north west as the top district to live and explore neighborhoods in this district next 

#### Since many people come to this area are parents with kids, we would extract public schools from north west district and evaluate 
#### convenience around each school, instead of recommending neighborhoods, we can look at the best school area to live in

#### Download Raleigh Education Public Schools

In [None]:
# download raleigh public school system  data from opendata
!wget -q -O 'public_school.json' https://opendata.arcgis.com/datasets/dea6ff0e8b4743a0ba361e13a85a4c70_3.geojson

print('Data downloaded!')

with open('public_school.json') as json_data:
    json_data = json.load(json_data)

public_school=json_data['features']
    

In [None]:
# download raleigh public school system  data from opendata
!wget -q -O 'public_school.json' https://opendata.arcgis.com/datasets/dea6ff0e8b4743a0ba361e13a85a4c70_3.geojson

print('Data downloaded!')

with open('public_school.json') as json_data:
    json_data = json.load(json_data)

public_school=json_data['features']

# define the dataframe columns
column_names = ["objectid","school_name","schoo_addr","school_city","school_county","Latitude","Longitude"]

# instantiate the dataframe
public_school_df = pd.DataFrame(columns=column_names)

for data in public_school:
    value=data["properties"]
    geo_values=data["geometry"]
    public_school_df = public_school_df.append({'objectid':value["OBJECTID"],
                                          'school_name': value["SCHOOL_NAM"],
                                          'schoo_addr': value["PHYS_ADDR"],
                                          'school_city': value["PHYS_CITY"],
                                          'school_county': value["COUNTY"],
                                          'Latitude': geo_values['coordinates'][1],
                                          'Longitude': geo_values['coordinates'][0]                                       
                                         }, ignore_index=True)

In [None]:
#Keep schools with school city in Raleigh only
Raleigh_public_school_df=public_school_df[public_school_df["school_city"]=="Raleigh"]

In [None]:
#Replot the safety map and districts 
# school and district map was not found, so put schools on the map and mannually find schools in north west district 

# definition of the boundaries in the map for districts used in police incident reports
url='https://opendata.arcgis.com/datasets'
dist_bound=f'{url}/a8cd851199cc46029b6b73151711ab75_0.geojson'

map_raleigh = folium.Map(location=[latitude, longitude], zoom_start=11)

folium.Choropleth(
    geo_data=dist_bound,
    name='choropleth',
    data=crimedata2,
    columns=['DISTRICT', 'Number'],
    key_on='feature.properties.DISTRICT',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    highlight=True,
    legend_name='Number of incidents per district'
).add_to(map_raleigh)

folium.LayerControl().add_to(map_raleigh)

for lat, lng, label in zip(Raleigh_public_school_df['Latitude'], Raleigh_public_school_df['Longitude'], Raleigh_public_school_df['school_name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_raleigh)  
folium.LayerControl().add_to(map_raleigh)
map_raleigh

##### School's district information was not found on the data, the following schools are found by manually view each school on the district map 
####### Sycamore Creek Elementary
####### Brier Creek Elementary
####### Leesville Road High
####### Leesville Road Middle
####### Leesville Road Elementary
####### Hilburn Drive Academy
####### Jeffreys Grove Elementary
####### York Elementary
####### Stough Elementary
####### Sycamore Creek Elementary


Let's get address for these schools and get venues around these schools

In [None]:
#north west district public schools 
nw_school_list=['Sycamore Creek Elementary','Brier Creek Elementary','Leesville Road High','Leesville Road Middle','Leesville Road Elementary','Hilburn Drive Academy','Jeffreys Grove Elementary','York Elementary','Stough Elementary','Sycamore Creek Elementary']
nw_Raleigh_public_school_df=Raleigh_public_school_df[Raleigh_public_school_df['school_name'].isin(nw_school_list)]
nw_Raleigh_public_school_df

##### Get Nearby Venue for each school from foursquare

In [None]:
## Set up configurations to call Foursquare
CLIENT_ID = '*******' # your Foursquare ID
CLIENT_SECRET = '********' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100 #number of venues returned by Foursquare API
radius=10000 #define radius, unit meters
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


In [166]:
### Borrow functions from course 9 class for IBM capstone project

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [167]:
#Run for each school and create a data frame to store all venues
nw_school_venues = getNearbyVenues(names=nw_Raleigh_public_school_df['school_name'],
                                   latitudes=nw_Raleigh_public_school_df['Latitude'],
                                   longitudes=nw_Raleigh_public_school_df['Longitude']
                                  )

Brier Creek Elementary
Sycamore Creek Elementary
Jeffreys Grove Elementary
Leesville Road Elementary
Leesville Road Middle
Leesville Road High
Stough Elementary
York Elementary
Hilburn Drive Academy


##### Check venues around each school

In [168]:
nw_school_venues.shape

(41, 7)

In [169]:
# one hot encoding
nw_school_venues_onehot = pd.get_dummies(nw_school_venues [['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
nw_school_venues_onehot['Neighborhood'] = nw_school_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [nw_school_venues_onehot.columns[-1]] + list(nw_school_venues_onehot.columns[:-1])
nw_school_venues_onehot = nw_school_venues_onehot[fixed_columns]

#group each school and get the total count of each category 
nw_school_venues_grouped=nw_school_venues_onehot.groupby('Neighborhood').sum().reset_index()
nw_school_venues_grouped

Unnamed: 0,Neighborhood,Business Service,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Farm,Flower Shop,Football Stadium,Gym,Gym / Fitness Center,Kids Store,Lawyer,Men's Store,Mexican Restaurant,Mobile Phone Shop,Movie Theater,Music Venue,Paper / Office Supplies Store,Park,Pharmacy,Pizza Place,Playground,Pool,Rental Car Location,Smoke Shop,Supermarket,Video Store,Wings Joint
0,Brier Creek Elementary,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0
1,Hilburn Drive Academy,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0
2,Jeffreys Grove Elementary,0,1,0,0,1,0,0,0,0,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1
3,Leesville Road Elementary,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0
4,Leesville Road High,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Leesville Road Middle,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Stough Elementary,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
7,Sycamore Creek Elementary,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,2,0,0,0,0,0
8,York Elementary,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


##### Confirm the new size

In [170]:
nw_school_venues_grouped.shape

(9, 30)

In [171]:
##### Let's print each school along with the top 10 most common venues

In [172]:
#### Borrow functions from Cap stone project in course 9 
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues =10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = nw_school_venues_grouped['Neighborhood']

for ind in np.arange(nw_school_venues_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(nw_school_venues_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Brier Creek Elementary,Pool,Playground,Pizza Place,Pharmacy,Gym,Wings Joint,Kids Store,Chinese Restaurant,Convenience Store,Cosmetics Shop
1,Hilburn Drive Academy,Mexican Restaurant,Convenience Store,Cosmetics Shop,Pizza Place,Music Venue,Movie Theater,Kids Store,Lawyer,Chinese Restaurant,Dance Studio
2,Jeffreys Grove Elementary,Lawyer,Gym / Fitness Center,Chinese Restaurant,Video Store,Mobile Phone Shop,Wings Joint,Rental Car Location,Smoke Shop,Supermarket,Dance Studio
3,Leesville Road Elementary,Playground,Park,Football Stadium,Wings Joint,Lawyer,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega
4,Leesville Road High,Football Stadium,Wings Joint,Men's Store,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Farm,Flower Shop
5,Leesville Road Middle,Football Stadium,Wings Joint,Men's Store,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Farm,Flower Shop
6,Stough Elementary,Men's Store,Pool,Deli / Bodega,Lawyer,Wings Joint,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Farm
7,Sycamore Creek Elementary,Pool,Playground,Park,Paper / Office Supplies Store,Farm,Flower Shop,Wings Joint,Kids Store,Chinese Restaurant,Convenience Store
8,York Elementary,Business Service,Men's Store,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Farm,Flower Shop,Football Stadium


#### Clustering Schools as Neighborhoods
##### Run K-means to cluster the schools into 3 clusters 

In [173]:
# Klustering the schools based on the venues around them 
kclusters = 3

nw_school_venues_grouped_clustering = nw_school_venues_grouped.drop('Neighborhood',1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(nw_school_venues_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 2, 0, 0, 0, 0, 1, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each schoo neighborhood.

In [178]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

nw_Raleigh_public_school_df_merged = nw_Raleigh_public_school_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
nw_Raleigh_public_school_df_merged = nw_Raleigh_public_school_df_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='school_name')

nw_Raleigh_public_school_df_merged

Unnamed: 0,objectid,school_name,schoo_addr,school_city,school_county,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1279,2280,Brier Creek Elementary,9801 Brier Creek Parkway,Raleigh,Wake,35.901672,-78.805209,0,Pool,Playground,Pizza Place,Pharmacy,Gym,Wings Joint,Kids Store,Chinese Restaurant,Convenience Store,Cosmetics Shop
1389,2390,Sycamore Creek Elementary,10921 Leesville Rd,Raleigh,Wake,35.918568,-78.732958,1,Pool,Playground,Park,Paper / Office Supplies Store,Farm,Flower Shop,Wings Joint,Kids Store,Chinese Restaurant,Convenience Store
2205,1612,Jeffreys Grove Elementary,6119 Creedmoor Rd,Raleigh,Wake,35.868088,-78.681065,2,Lawyer,Gym / Fitness Center,Chinese Restaurant,Video Store,Mobile Phone Shop,Wings Joint,Rental Car Location,Smoke Shop,Supermarket,Dance Studio
2210,1617,Leesville Road Elementary,8401 Leesville Rd,Raleigh,Wake,35.887894,-78.71859,0,Playground,Park,Football Stadium,Wings Joint,Lawyer,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega
2212,1619,Leesville Road Middle,8405 Leesville Rd,Raleigh,Wake,35.889222,-78.718566,0,Football Stadium,Wings Joint,Men's Store,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Farm,Flower Shop
2214,1621,Leesville Road High,8409 Leesville Rd,Raleigh,Wake,35.890837,-78.718507,0,Football Stadium,Wings Joint,Men's Store,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Farm,Flower Shop
2239,1646,Stough Elementary,4210 Edwards Mill Rd,Raleigh,Wake,35.83601,-78.692128,0,Men's Store,Pool,Deli / Bodega,Lawyer,Wings Joint,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Farm
2258,1665,York Elementary,5201 Brookhaven Dr,Raleigh,Wake,35.852519,-78.683395,0,Business Service,Men's Store,Chinese Restaurant,Convenience Store,Cosmetics Shop,Dance Studio,Deli / Bodega,Farm,Flower Shop,Football Stadium
2351,1758,Hilburn Drive Academy,7100 Hilburn Drive,Raleigh,Wake,35.875079,-78.714175,0,Mexican Restaurant,Convenience Store,Cosmetics Shop,Pizza Place,Music Venue,Movie Theater,Kids Store,Lawyer,Chinese Restaurant,Dance Studio


In [179]:
##### Visualize the resulting clusters

In [180]:
# add clusters to map raleigh 
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(nw_Raleigh_public_school_df_merged['Latitude'], nw_Raleigh_public_school_df_merged['Longitude'], nw_Raleigh_public_school_df_merged['school_name'], nw_Raleigh_public_school_df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=20,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_raleigh)  
    
folium.LayerControl().add_to(map_raleigh)
map_raleigh

#### Thanks for finish reading!