# Data science capstone 

### Using pandas to read the first table from given url

In [2]:
import pandas as pd
tables=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
print(tables[0])

    Postal code           Borough  \
0           M1A      Not assigned   
1           M2A      Not assigned   
2           M3A        North York   
3           M4A        North York   
4           M5A  Downtown Toronto   
5           M6A        North York   
6           M7A  Downtown Toronto   
7           M8A      Not assigned   
8           M9A         Etobicoke   
9           M1B       Scarborough   
10          M2B      Not assigned   
11          M3B        North York   
12          M4B         East York   
13          M5B  Downtown Toronto   
14          M6B        North York   
15          M7B      Not assigned   
16          M8B      Not assigned   
17          M9B         Etobicoke   
18          M1C       Scarborough   
19          M2C      Not assigned   
20          M3C        North York   
21          M4C         East York   
22          M5C  Downtown Toronto   
23          M6C              York   
24          M7C      Not assigned   
25          M8C      Not assigned   
2

### Converting it to a dataframe using pandas

In [3]:
df=pd.DataFrame(tables[0])
print(df.shape)
df.head()

(180, 3)


Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Replacing not a number(nan) with 0 in neighborhood column

In [4]:
df=df.fillna(0)
df.replace(to_replace='Not assigned',value='0',inplace=True) #replacing not assigned boroughs with 0 
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,0,0
1,M2A,0,0
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Excluding rows which have 0 value as borough

In [5]:
dfn=pd.DataFrame(df[df.Borough !='0']) 
dfn.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


### Cleaning the dataframe

In [6]:
dfn.groupby(['Postal code'])
dfn.rename(columns={'Postal code':'Postalcode'},inplace=True)
dfn['Neighborhood']=dfn['Neighborhood'].str.replace('/',',')  
dfn.reset_index(drop=True)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [7]:
dfn.describe()

Unnamed: 0,Postalcode,Borough,Neighborhood
count,103,103,103
unique,103,10,98
top,M1X,North York,Downsview
freq,1,24,4


In [8]:
dfn['Postalcode']=dfn['Postalcode'].astype(str)

### GETTING COORDINATES OF EACH NEIGHBORHOOD

In [9]:
coords=pd.read_csv('http://cocl.us/Geospatial_data')
coords.head()
coords.rename(columns={'Postal Code':'Postalcode'},inplace=True)

In [10]:
coords.reset_index(drop=True)
coords.head()

Unnamed: 0,Postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
coords.describe()

Unnamed: 0,Latitude,Longitude
count,103.0,103.0
mean,43.704608,-79.397153
std,0.052463,0.097146
min,43.602414,-79.615819
25%,43.660567,-79.464763
50%,43.696948,-79.38879
75%,43.74532,-79.340923
max,43.836125,-79.160497


In [12]:
coords['Postalcode']=coords['Postalcode'].astype(str) #changing the common column datatype to merge using the common column later

In [21]:
mergedf=pd.merge(dfn,coords,on='Postalcode') #merging the tables 

#### prob 3 exploring and clustering

In [14]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1

In [19]:
address='Toronto,Canada'
geolocator=Nominatim()
location=geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print('geographical coords of city of toronta are {},{}'.format(latitude,longitude))

geographical coords of city of toronta are 43.6534817,-79.3839347


  from ipykernel import kernelapp as app


In [22]:
map_toronto=folium.Map(location=[latitude,longitude],zoom_start=9)
#adding markers
for lat,lng,borough,neighborhood in zip(mergedf['Latitude'],mergedf['Longitude'],mergedf['Borough'],mergedf['Neighborhood']):
    label='{},{}'.format(neighborhood,borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=6,
        popup=label,
        color='brown',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.2).add_to(map_toronto)
map_toronto

In [25]:
CLIENT_ID='YQWIXLNXHCLQORDT220OEKHDCGFNKHFELR3T3QEMG4WQR341'
CLIENT_SECRET='VBSK5Q1VUDUYAEUH3XZ5BEEPHZODU13MHSUXKKPT0MJ45VIP'
VERSION='20180604'
LIMIT=30
print('YOUR CREDENTIALS')
print('client_id ' + CLIENT_ID)
print('client_secret ' + CLIENT_SECRET)

YOUR CREDENTIALS
client_id YQWIXLNXHCLQORDT220OEKHDCGFNKHFELR3T3QEMG4WQR341
client_secret VBSK5Q1VUDUYAEUH3XZ5BEEPHZODU13MHSUXKKPT0MJ45VIP


In [31]:
dfilter=mergedf[mergedf['Borough'].str.contains('Etobicoke')]
dn=dfilter.reset_index(drop=True)
dn.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M9B,Etobicoke,"West Deane Park , Princess Gardens , Martin Gr...",43.650943,-79.554724
2,M9C,Etobicoke,"Eringate , Bloordale Gardens , Old Burnhamthor...",43.643515,-79.577201
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9R,Etobicoke,"Kingsview Village , St. Phillips , Martin Grov...",43.688905,-79.554724


In [34]:
map_eto=folium.Map(location=[latitude,longitude],zoom_start=12)
for lat,lng,borough,neighborhood in zip(dn['Latitude'],dn['Longitude'],dn['Borough'],dn['Neighborhood']):
    label='{},{}'.format(neighborhood,borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='green',
        fill_color='3199cc',
        fill_opacity=0.3).add_to(map_eto)
map_eto

In [37]:
dn.loc[0,'Neighborhood']

'Islington Avenue'

In [44]:
neigh_lat=dn.loc[0,'Latitude']
neigh_long=dn.loc[0,'Longitude']
neigh_name=dn.loc[0,'Neighborhood']

In [47]:
radius=900
url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
neigh_lat,
neigh_long,
radius,
LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=YQWIXLNXHCLQORDT220OEKHDCGFNKHFELR3T3QEMG4WQR341&client_secret=VBSK5Q1VUDUYAEUH3XZ5BEEPHZODU13MHSUXKKPT0MJ45VIP&v=20180604&ll=43.6678556,-79.53224240000002&radius=900&limit=30'

In [48]:
results=requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e90391260ba08001b03c8db'},
 'response': {'headerLocation': 'Edenbridge - Humber Valley',
  'headerFullLocation': 'Edenbridge - Humber Valley, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 10,
  'suggestedBounds': {'ne': {'lat': 43.67595560810001,
    'lng': -79.52106546953107},
   'sw': {'lat': 43.659755591899994, 'lng': -79.54341933046896}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bfd53764cf820a13849ecf4',
       'name': "Java Joe's Village Cafe",
       'location': {'address': '1500 Islington Ave',
        'crossStreet': 'at Rathburn Rd',
        'lat': 43.662460906352436,
        'lng': -79.53205381416235,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.662460906352436,
   

In [50]:
'these are {} places to explore near "{}" neighborhood'.format(len(results['response']['groups'][0]['items']),neigh_name)

'these are 10 places to explore near "Islington Avenue" neighborhood'

In [53]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [59]:

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Java Joe's Village Cafe,Café,43.662461,-79.532054
1,St Georges Golf and Country Club,Golf Course,43.674395,-79.537142
2,TD Canada Trust,Bank,43.662545,-79.531749
3,Shoppers Drug Mart,Pharmacy,43.663067,-79.531753
4,Thorncrest Drug Store,Pharmacy,43.662988,-79.531817
5,Foodland - Toronto,Grocery Store,43.662724,-79.531984
6,Thorncrest Plaza,Shopping Mall,43.66262,-79.532146
7,Princess Margaret Park,Playground,43.667835,-79.539934
8,Humber Valley Park,Park,43.664825,-79.524999
9,Humber Valley Rink,Skating Rink,43.664826,-79.524873


In [60]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

10 venues were returned by Foursquare.


In [62]:
map_eto=folium.Map(location=[neigh_lat,neigh_long],zoom_start=15)
folium.features.CircleMarker(
[neigh_lat, neigh_long],
    radius=12,
    color='red',
    popup= neigh_name,
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_eto)
for lat,lng,name,categories in zip(nearby_venues['lat'],nearby_venues['lng'],nearby_venues['name'],nearby_venues['categories']):
    label='{},{}'.format(name,categories)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=6,
    popup=label,
    color='green',
    fill=True,
    fill_color='#3199cc',
    fill_opacity=0.3).add_to(map_eto)
map_eto