# Importing Dependancies

In [1]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline
import math

In [2]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium


Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be 

In [3]:
from sklearn.cluster import KMeans

### Extracting the table from wikipedia page

In [5]:
wiki=pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
toronto_df=wiki[0]

### Cleaning the data

In [6]:
toronto_df=toronto_df[toronto_df['Borough']!="Not assigned"] #Removing all columns that doesnt have Borough name

toronto_df['Neighbourhood'].replace(to_replace="Not assigned",value=toronto_df["Borough"],inplace=True)# Replacing columns with no neighbour hood with borough name

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


In [28]:
toronto_df=toronto_df.groupby(['Postcode','Borough'],as_index=False).agg(lambda x :','.join(x)) # grouping by postal codes 
toronto_df.head()
toronto_df.shape

(103, 3)

# Importing the latitude-longitude dataset

In [8]:
lat_long=pd.read_csv('http://cocl.us/Geospatial_data')

#### Checking if the rows of both the dataframes are n same order

In [9]:
for i in range(len(toronto_df)):
    if toronto_df['Postcode'].iloc[i]==lat_long['Postal Code'].iloc[i]:
        check=True
    else:
        chcek=False
        break
print(check) 

True


#### Adding longitude and latitue to the toronto dataframe

In [10]:
toronto_df["Latitude"]=lat_long['Latitude']
toronto_df['Longitude']=lat_long['Longitude']

# Plotting the latitude and longitude as markers

In [11]:
toronto_map = folium.Map(location=[toronto_df['Latitude'].mean(),toronto_df['Longitude'].mean()], zoom_start=11)

In [12]:
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map

# Clustering Postal Codes

### Cleaning Before Clustering

In [13]:
toronto_clustering=toronto_df.drop(['Postcode','Borough','Neighbourhood'],axis=1)


### Clustering and Mapping

In [14]:
kmeans = KMeans(n_clusters=5, random_state=0).fit(toronto_clustering)


In [15]:
toronto_clusterlabels=toronto_df
toronto_clusterlabels.insert(0, 'Cluster Labels', kmeans.labels_)

### Getting Colours for labels

In [16]:
x = np.arange(5)
ys = [i + x + (i*x)**2 for i in range(5)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

### Mapping the Clusters

In [17]:
map_clusters = folium.Map(location=[toronto_df['Latitude'].mean(), toronto_df['Longitude'].mean()], zoom_start=11)

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_clusterlabels['Latitude'], toronto_clusterlabels['Longitude'], toronto_clusterlabels['Neighbourhood'], toronto_clusterlabels['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Exploring The Clusters

## Let us explore Cluster 1

In [18]:
cluster_1=toronto_clusterlabels[toronto_clusterlabels['Cluster Labels']==1]
cluster_1=cluster_1.reset_index()
cluster_1.drop(['index','Cluster Labels'],axis=1,inplace=True)

### Getting Foursquare Credentials

In [19]:
CLIENT_ID = 'LW20LDSOAKWXJ0FDQI3DGAGHYNU2ZX3WRYSZLPJUU2FQPXB4' 
CLIENT_SECRET = 'OO3INU01SLY3B1QBRIHCI1ZRGGARVSFE2Q1SEWA3IKSRMJJ4' 
VERSION = '20180605'
 

### Getting the radius of the cluster

In [20]:
centre=kmeans.cluster_centers_[1]
lat1=centre[0]
long1=centre[1]
distance=[]
for i in range(len(cluster_1)):
    lat2=cluster_1.loc[i,'Latitude']
    long2=cluster_1.loc[i,'Longitude']
    dLat = math.radians(lat2-lat1)  
    dLon = math.radians(long2-long1) 
    a =  math.sin(dLat/2) * math.sin(dLat/2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *  math.sin(dLon/2) * math.sin(dLon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = 6371 * c; 
    distance.append(d)
radius=max(distance)*1000

### Calling the Foursquare API

In [21]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lat1, 
    long1, 
    radius, 
    100)

In [22]:
results = requests.get(url).json()
results


{'meta': {'code': 200, 'requestId': '5d5ff1ae9241f80026ebc969'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 237,
  'suggestedBounds': {'ne': {'lat': 43.763782241860646,
    'lng': -79.41858638228211},
   'sw': {'lat': 43.60180007987848, 'lng': -79.64215579163093}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4c719a8a1b11199c04d75313',
       'name': 'Ma Maison, Boulangerie - Patisserie - Traiteur',
       'location': {'address': '4243 Dundas St W',
        'crossStreet': 'East of Royal York Road',
        'lat': 43.65949039354277,
        'lng': -79.51362441836238,
       

## Making DataFrame from the Json File

In [23]:
cols=['id',"name","latitude","longitude","category",'category id','distance']
rows=range(100)
venues_df=pd.DataFrame(columns=cols,index=rows)
for i in range(100):
    venues_df['id'][i]=results['response']['groups'][0]['items'][i]['venue']['id']
    venues_df['name'][i]=results['response']['groups'][0]['items'][i]['venue']['name']
    venues_df['latitude'][i]=results['response']['groups'][0]['items'][i]['venue']['location']['lat']
    venues_df['longitude'][i]=results['response']['groups'][0]['items'][i]['venue']['location']['lng']
    venues_df['category'][i]=results['response']['groups'][0]['items'][i]['venue']['categories'][0]['name']
    venues_df['category id'][i]=results['response']['groups'][0]['items'][i]['venue']['categories'][0]['id']
    venues_df['distance'][i]=results['response']['groups'][0]['items'][i]['venue']['location']['distance']



### Plotting The Venues

In [24]:
venue_map = folium.Map(location=[lat1, long1], zoom_start=11)
folium.Circle(
        [lat1,long1],
        radius=radius).add_to(venue_map)
folium.CircleMarker(
        [lat1,long1],
        radius=5,
        popup="Centre of The Cluster",
        color='#FFFFFF',
        fill_color='#000000').add_to(venue_map)
for lat, lon, cat, name in zip(venues_df['latitude'], venues_df['longitude'],venues_df['category'],venues_df['name']):
    label = folium.Popup(str(cat) +' / '+ str(name), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=2,
        fill=True,
        popup=label,
        color='#000000',
        fill_opacity=0.7).add_to(venue_map)

venue_map

### Exploring Individual venues


In [25]:
venue_id=venues_df['id'][0]
venue_lat=venues_df['latitude'][0]
venue_lon=venues_df['longitude'][0]

venue1_url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
    venue_id,
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    venue_lat, 
    venue_lon)

In [26]:
venue1_json=requests.get(venue1_url).json()
venue1_json

{'meta': {'code': 200, 'requestId': '5d5ff1b49ba3e5002c914ef5'},
 'response': {'venue': {'id': '4c719a8a1b11199c04d75313',
   'name': 'Ma Maison, Boulangerie - Patisserie - Traiteur',
   'contact': {'phone': '4162362234', 'formattedPhone': '(416) 236-2234'},
   'location': {'address': '4243 Dundas St W',
    'crossStreet': 'East of Royal York Road',
    'lat': 43.65949039354277,
    'lng': -79.51362441836238,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.65949039354277,
      'lng': -79.51362441836238}],
    'cc': 'CA',
    'city': 'Etobicoke',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['4243 Dundas St W (East of Royal York Road)',
     'Etobicoke ON',
     'Canada']},
   'canonicalUrl': 'https://foursquare.com/v/ma-maison-boulangerie--patisserie--traiteur/4c719a8a1b11199c04d75313',
   'categories': [{'id': '4bf58dd8d48988d10c941735',
     'name': 'French Restaurant',
     'pluralName': 'French Restaurants',
     'shortName': 'French',
     'icon

# Similarly Others can be explore and stored into a dataFrame