# Caroline's Capstone Final 

## Finding the best neighbourhood for a young, social client to live in Halifax

Loading geopy and other packages

In [47]:
!pip install geopy



In [48]:

# install libraries
!pip install beautifulsoup4
!pip install lxml
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

#!conda|install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# displaying images
from IPython.display import Image 
from IPython.core.display import HTML 


from IPython.display import display_html
import pandas as pd
import numpy as np
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Folium installed')
print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Folium installed
Libraries imported.


# Loading Data 

In [49]:
# Postal code and neighbourhood 
data = pd.read_csv (r'data/halifax_postcodesnew.csv')
#define dataframe columns
df = pd.DataFrame(data, columns= ['Postal Code','Neighbourhood', 'Latitude','Longitude'])

df.head(9)

Unnamed: 0,Postal Code,Neighbourhood,Latitude,Longitude
0,B3M,Halifax Bedford Basin,44.6809,-63.6947
1,B3J,Halifax Mid-Harbour,44.645,-63.5747
2,B3N,Halifax South Central,44.6435,-63.6333
3,B3L,Halifax Central,44.6511,-63.614
4,B3P,Halifax North West Arm,44.6178,-63.5997
5,B3K,Halifax Upper Harbour,44.6602,-63.601
6,B3H,Halifax Lower Harbour,44.6349,-63.5788
7,B3R,Halifax South,44.5939,-63.6234
8,B3S,Halifax West,44.6445,-63.6677


# Loading housing data

In [50]:
# load housing data
source = requests.get('https://housepricehub.com/search?q=halifax+house+prices&pricemin=0&pricemax=0&city=halifax').text
soup=BeautifulSoup(source,'lxml')
print(soup.title)
from IPython.display import display_html
tab = str(soup.table)


<title>Search results for: halifax house prices - House Price Hub</title>


In [51]:
#define dataframe columns

dfs = pd.read_html(tab)
housing=dfs[0]
housing.head()

Unnamed: 0,Matching Rank,Property,Area,City,Price,Updated,Detailed History
0,1,"322 Transom Drive, Halifax, Nova Scotia B3M0L6",B3M,Halifax,"$589,000",2/1/2019,
1,2,"6296 Chebucto Road, Halifax, Nova Scotia B3L1K8",B3L,Halifax,"$400,000",2/1/2019,
2,3,"80 Tangmere Crescent, Halifax, Nova Scotia B3M1K1",B3M,Halifax,"$355,000",2/1/2019,
3,4,"5521 Leeds Street, Halifax, Nova Scotia B3K2T3",B3K,Halifax,"$349,900",2/1/2019,
4,5,"26 Maplewood Drive, Halifax, Nova Scotia B3N1C3",B3N,Halifax,"$299,000",2/1/2019,


In [52]:
#rename Area to postal code to match first table
housing.rename(columns={'Area':'Postal Code'},inplace=True)

# calling needed columns 
house= housing[['Property','Postal Code','Price']]

house.head()

Unnamed: 0,Property,Postal Code,Price
0,"322 Transom Drive, Halifax, Nova Scotia B3M0L6",B3M,"$589,000"
1,"6296 Chebucto Road, Halifax, Nova Scotia B3L1K8",B3L,"$400,000"
2,"80 Tangmere Crescent, Halifax, Nova Scotia B3M1K1",B3M,"$355,000"
3,"5521 Leeds Street, Halifax, Nova Scotia B3K2T3",B3K,"$349,900"
4,"26 Maplewood Drive, Halifax, Nova Scotia B3N1C3",B3N,"$299,000"


In [53]:
# merging the tables 

data = pd.merge(df,house,on='Postal Code')
data.head()

Unnamed: 0,Postal Code,Neighbourhood,Latitude,Longitude,Property,Price
0,B3M,Halifax Bedford Basin,44.6809,-63.6947,"322 Transom Drive, Halifax, Nova Scotia B3M0L6","$589,000"
1,B3M,Halifax Bedford Basin,44.6809,-63.6947,"80 Tangmere Crescent, Halifax, Nova Scotia B3M1K1","$355,000"
2,B3M,Halifax Bedford Basin,44.6809,-63.6947,"47 Fleetview Drive, Halifax, Nova Scotia B3M4W1","$284,900"
3,B3M,Halifax Bedford Basin,44.6809,-63.6947,"171 CRESTHAVEN Drive, Halifax, Nova Scotia B3M4S4","$629,900"
4,B3M,Halifax Bedford Basin,44.6809,-63.6947,"Lot SB20D 322 Starboard Drive, Halifax, Nova S...","$599,900"


# Finding the affordable neighbourhoods

Finding houses that are under the $400,000 price limit

In [54]:
# determining data type of 'Price' column 
data.dtypes
# -> Price is listed as object
# convert Price objects to number values 
cost = data.Price.replace('\D', '', regex=True).astype(int)

In [55]:
# replace Price column with new cost column
data['Price']=cost.values

data.head()

Unnamed: 0,Postal Code,Neighbourhood,Latitude,Longitude,Property,Price
0,B3M,Halifax Bedford Basin,44.6809,-63.6947,"322 Transom Drive, Halifax, Nova Scotia B3M0L6",589000
1,B3M,Halifax Bedford Basin,44.6809,-63.6947,"80 Tangmere Crescent, Halifax, Nova Scotia B3M1K1",355000
2,B3M,Halifax Bedford Basin,44.6809,-63.6947,"47 Fleetview Drive, Halifax, Nova Scotia B3M4W1",284900
3,B3M,Halifax Bedford Basin,44.6809,-63.6947,"171 CRESTHAVEN Drive, Halifax, Nova Scotia B3M4S4",629900
4,B3M,Halifax Bedford Basin,44.6809,-63.6947,"Lot SB20D 322 Starboard Drive, Halifax, Nova S...",599900


In [56]:
# Find houses between the values of $0-$400,000
limit = data[(data['Price'] >= 0) & (data['Price'] <= 400000)]
limit.head()


Unnamed: 0,Postal Code,Neighbourhood,Latitude,Longitude,Property,Price
1,B3M,Halifax Bedford Basin,44.6809,-63.6947,"80 Tangmere Crescent, Halifax, Nova Scotia B3M1K1",355000
2,B3M,Halifax Bedford Basin,44.6809,-63.6947,"47 Fleetview Drive, Halifax, Nova Scotia B3M4W1",284900
6,B3M,Halifax Bedford Basin,44.6809,-63.6947,"40 Clayton Park Drive, Halifax, Nova Scotia B3...",359900
7,B3M,Halifax Bedford Basin,44.6809,-63.6947,"138 STONEYBROOK Court, Halifax, Nova Scotia B3...",295000
8,B3M,Halifax Bedford Basin,44.6809,-63.6947,"58 Outrigger Crescent, Halifax, Nova Scotia B3...",294500


mapping affordable neighbourhoods

In [57]:
from folium.plugins import MarkerCluster

map_halifax = folium.Map(location=[44.6388,-63.6552],zoom_start=12)

marker_cluster = MarkerCluster().add_to(map_halifax)

for lat,lng,neighbourhood, in zip(limit['Latitude'],limit['Longitude'],limit['Neighbourhood']):
    label = '{}'.format(neighbourhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(marker_cluster)
    

    
map_halifax

From the map we can see that the three neighbourhoods with the most realestate availble are Halifax Bedford Basin, Halifax Central, and Halifax Upper Harbour. 

# Foursquare API 

In [58]:
CLIENT_ID = 'DY2HIZLRO0VFRUS1TG1ZYA35OH2SXF1V34DDTYTBFOUGVZ15'
CLIENT_SECRET = 'G441KGLHQJQQCBEFI2EKE1E2PEALYMGGUPEA3PAP31Q0AHE5'
VERSION = '20180605' 
LIMIT = 100 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: DY2HIZLRO0VFRUS1TG1ZYA35OH2SXF1V34DDTYTBFOUGVZ15
CLIENT_SECRET:G441KGLHQJQQCBEFI2EKE1E2PEALYMGGUPEA3PAP31Q0AHE5


______________________________________________________________________

 Halifax Bedford Basin

In [59]:
neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Halifax Bedford Basin are 44.6809, -63.6947.


In [60]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=DY2HIZLRO0VFRUS1TG1ZYA35OH2SXF1V34DDTYTBFOUGVZ15&client_secret=G441KGLHQJQQCBEFI2EKE1E2PEALYMGGUPEA3PAP31Q0AHE5&v=20180605&ll=44.6809,-63.6947&radius=500&limit=100'

0 venues retrieved by foursquare

______________________________________________________________________

Halifax Central

In [61]:
neighborhood_latitude1 = df.loc[3, 'Latitude'] 
neighborhood_longitude1 = df.loc[3, 'Longitude'] 

neighborhood_name1 = df.loc[3, 'Neighbourhood']

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name1, 
                                                               neighborhood_latitude1, 
                                                               neighborhood_longitude1))

Latitude and longitude values of Halifax Central are 44.6511, -63.614.


In [62]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

url1 = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude1, 
    neighborhood_longitude1, 
    radius, 
    LIMIT)
url1 # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=DY2HIZLRO0VFRUS1TG1ZYA35OH2SXF1V34DDTYTBFOUGVZ15&client_secret=G441KGLHQJQQCBEFI2EKE1E2PEALYMGGUPEA3PAP31Q0AHE5&v=20180605&ll=44.6511,-63.614&radius=500&limit=100'

In [63]:
results1 = requests.get(url1).json()

In [64]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [65]:
venues1 = results1['response']['groups'][0]['items']
    
nearby_venues1 = json_normalize(venues1) # flatten JSON

# filter columns
filtered_columns1 = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues1 =nearby_venues1.loc[:, filtered_columns1]

# filter the category for each row
nearby_venues1['venue.categories'] = nearby_venues1.apply(get_category_type, axis=1)

# clean columns
nearby_venues1.columns = [col.split(".")[-1] for col in nearby_venues1.columns]

nearby_venues1.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Apple Halifax Shopping Centre,Electronics Store,44.649197,-63.618668
1,Victoria's Secret,Lingerie Store,44.648994,-63.618365
2,Thai Express,Restaurant,44.649249,-63.618682
3,Halifax Shopping Centre,Shopping Mall,44.649104,-63.618127
4,SEPHORA,Cosmetics Shop,44.649288,-63.618325


In [66]:
print('{} venues were returned by Foursquare.'.format(nearby_venues1.shape[0]))

29 venues were returned by Foursquare.


Finding resutrants and cafes

In [67]:
count = nearby_venues1['categories'].value_counts()
print(count)

Coffee Shop             4
Fast Food Restaurant    3
Restaurant              3
Shoe Store              2
Bookstore               1
Video Game Store        1
Jewelry Store           1
Sporting Goods Shop     1
Luggage Store           1
Lingerie Store          1
Japanese Restaurant     1
Food Court              1
Cosmetics Shop          1
Electronics Store       1
Clothing Store          1
Shopping Mall           1
Chinese Restaurant      1
Ice Cream Shop          1
Sandwich Place          1
Diner                   1
Pharmacy                1
Name: categories, dtype: int64


In [71]:

map_central = folium.Map(location=[44.6511, -63.614], zoom_start=15)

# add markers to map
for lat, lng, label in zip(nearby_venues1['lat'], nearby_venues1['lng'], nearby_venues1['name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_central)  
    
map_central

______________________________________________________________________

Halifax Upper Harbour

In [31]:
neighborhood_latitude2 = df.loc[5, 'Latitude'] 
neighborhood_longitude2 = df.loc[5, 'Longitude'] 

neighborhood_name2 = df.loc[5, 'Neighbourhood'] 

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name2, 
                                                               neighborhood_latitude2, 
                                                               neighborhood_longitude2))

Latitude and longitude values of Halifax Upper Harbour are 44.6602, -63.601000000000006.


In [32]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude2, 
    neighborhood_longitude2, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=DY2HIZLRO0VFRUS1TG1ZYA35OH2SXF1V34DDTYTBFOUGVZ15&client_secret=G441KGLHQJQQCBEFI2EKE1E2PEALYMGGUPEA3PAP31Q0AHE5&v=20180605&ll=44.6602,-63.601000000000006&radius=500&limit=100'

In [33]:
results2 = requests.get(url).json()

In [34]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [35]:
venues2 = results2['response']['groups'][0]['items']
    
nearby_venues2 = json_normalize(venues2) # flatten JSON

# filter columns
filtered_columns2 = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues2 =nearby_venues2.loc[:, filtered_columns2]

# filter the category for each row
nearby_venues2['venue.categories'] = nearby_venues2.apply(get_category_type, axis=1)

# clean columns
nearby_venues2.columns = [col.split(".")[-1] for col in nearby_venues2.columns]

nearby_venues2.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Tarek's Cafe,Middle Eastern Restaurant,44.659183,-63.600927
1,Burrito Jax,Burrito Place,44.659463,-63.603505
2,The Italian Market,Italian Restaurant,44.658999,-63.604049
3,Hamachi Kita Sushi & Asian Flare,Sushi Restaurant,44.661938,-63.599275
4,Humani-T Cafe,Café,44.66065,-63.60144


In [36]:
print('{} venues were returned by Foursquare.'.format(nearby_venues2.shape[0]))

32 venues were returned by Foursquare.


In [37]:
count1 = nearby_venues2['categories'].value_counts()
print(count1)

Pizza Place                  2
Sandwich Place               2
Middle Eastern Restaurant    2
Breakfast Spot               2
Coffee Shop                  2
Chinese Restaurant           1
Bank                         1
Café                         1
Bakery                       1
Toy / Game Store             1
Steakhouse                   1
Gourmet Shop                 1
Sushi Restaurant             1
Grocery Store                1
Asian Restaurant             1
Burrito Place                1
Pub                          1
Fast Food Restaurant         1
Brewery                      1
Park                         1
Caribbean Restaurant         1
Italian Restaurant           1
Seafood Restaurant           1
Pharmacy                     1
Health & Beauty Service      1
Deli / Bodega                1
Discount Store               1
Name: categories, dtype: int64


In [74]:
map_upper = folium.Map(location=[44.6602, -63.601000000000006], zoom_start=16)

# add markers to map
for lat, lng, label in zip(nearby_venues2['lat'], nearby_venues2['lng'], nearby_venues2['name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_upper)  
    
map_upper