# Segmentation of Lost Animals by Venues
---

## 1. Importing Required Packages

In [140]:
import folium 
import folium.plugins
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy  as np
import pandas as pd
import requests
import geocoder

from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from pandas.io.json  import json_normalize 
from sklearn.cluster import KMeans

## 2. Getting Lost Pets Data from Animal Service of Toronto

In [1]:
def get_missed_pets(url):
    pets = []
    categories = ['Cat', 'Dog']
    
    response = requests.get(url)
    soup     = BeautifulSoup(response.text, 'lxml')
    tables   = soup.find_all('table')
    
    for index, table in enumerate(tables):
        rows = table.find_all('tr')
        for row in rows:
            cols = row.find_all('td')
            if len(cols) == 8:
                # cleaning crossing intersections data.
                cross_intersecs = cols[7].text.strip()
                cross_intersecs = cross_intersecs.replace(' AND ', '/')
                
                # if crossing intersections was not informed, the lost pet data
                # will be exclude from dataset.
                if cross_intersecs != '':
                    streets = cross_intersecs.split('/')
                    if (len(streets) < 2):
                        streets = cross_intersecs.split(' ')
                    
                    street_1 = streets[0]
                    street_2 = streets[1]
                    
                    pets.append((cols[0].text.strip(), cols[1].text.strip(), cols[2].text.strip()
                                      , cols[3].text.strip(), cols[4].text.strip(), cols[5].text.strip()
                                      , cols[6].text.strip(), cross_intersecs, street_1
                                      , street_2, categories[index]))
        
    pets = pd.DataFrame(pets)
    pets.columns = ['date', 'breed', 'age'
                   , 'sex', 'colour', 'receiving_shelter'
                   , 'id', 'crossing_intersections', 'cross_intersec_st1'
                   , 'cross_intersec_st2', 'category']
    
    return pets

In [142]:
# getting lost pet information from animal service of Toronto.
url     = 'https://www.toronto.ca/data/mls/animals/strayanimals.html'
missed_pets = get_missed_pets(url)
missed_pets.head(10)

Unnamed: 0,date,breed,age,sex,colour,receiving_shelter,id,crossing_intersections,cross_intersec_st1,cross_intersec_st2,category
0,2019-01-09,DOMESTIC SH,,Male,ORANGE,West Region,A824678,THE QUEENSWAY/KIPLING,THE QUEENSWAY,KIPLING,Cat
1,2019-01-09,DOMESTIC SH,,Unknown,WHITE,West Region,A824739,ISLINGTON BIRMINGHAM,ISLINGTON,BIRMINGHAM,Cat
2,2019-01-09,DOMESTIC SH,5M,Male,BLACK,North Region,A824752,REXDALE/ISLINGTON,REXDALE,ISLINGTON,Cat
3,2019-01-09,DOMESTIC SH,2Y,Female,BLACK,North Region,A824753,REXDALE/ISLINGTON,REXDALE,ISLINGTON,Cat
4,2019-01-12,DOMESTIC SH,3Y,Male,BRN TABBY,North Region,A824906,DANFORTH/VICTORIA PARK,DANFORTH,VICTORIA PARK,Cat
5,2019-01-12,DOMESTIC SH,,Unknown,BRN TABBY,West Region,A824912,KIPLING/401,KIPLING,401,Cat
6,2019-01-12,DOMESTIC SH,,Male,BRN TABBY,Found Animal Report,A824932,DUFFERIN/GLENCAIRN,DUFFERIN,GLENCAIRN,Cat
7,2019-01-13,DOMESTIC SH,,Female,BRN TABBY,North Region,A824940,DUFFERIN/ROGERS,DUFFERIN,ROGERS,Cat
8,2019-01-09,SHIH TZU,5Y,Male,WHITE,West Region,A824737,SPADINA DUNDAS,SPADINA,DUNDAS,Dog
9,2019-01-09,SIBERIAN HUSKY,,Neutered Male,WHITE,North Region,A824755,LANSDOWNE AVE,LANSDOWNE,AVE,Dog


## 3. Getting Crossing Intersections Geographical Coordinates

In [143]:
def get_cross_intersec_localization(pets):
    url = 'https://geocoder.api.here.com/6.2/geocode.json?city={}&street={}@{}&app_id={}&app_code={}&gen=9'
    api_id   = '79foQR1GPJRvsWDGB0Ul'
    api_code = 'E5YKLSl_O29hf-ipUlPFfQ'
   
    for row in pets.itertuples():
        address = url.format('Toronto'
                         , row.cross_intersec_st1
                         , row.cross_intersec_st2
                         , '79foQR1GPJRvsWDGB0Ul'
                         , 'E5YKLSl_O29hf-ipUlPFfQ')
        response = requests.get(address).json()
        try:
            
            localization = json_normalize(response['Response']['View'][0]['Result'][0]['Location'])
            pets.loc[row.Index,'cross_intersec_latitude']   = localization.loc[0, 'DisplayPosition.Latitude']
            pets.loc[row.Index,'cross_intersec_longitude']  = localization.loc[0, 'DisplayPosition.Longitude']
        except Exception as e:
            print('Crossing intersection {}/{} was not found in geocode database: {}! '.format(
                  row.cross_intersec_st1
                , row.cross_intersec_st2
                , str(e)))
        
    return(pets)


In [144]:
missed_pets = get_cross_intersec_localization(missed_pets)
missed_pets[['id','crossing_intersections', 'cross_intersec_st1', 'cross_intersec_st2'
             , 'cross_intersec_latitude', 'cross_intersec_longitude' ]].head(10)

Unnamed: 0,id,crossing_intersections,cross_intersec_st1,cross_intersec_st2,cross_intersec_latitude,cross_intersec_longitude
0,A824678,THE QUEENSWAY/KIPLING,THE QUEENSWAY,KIPLING,43.62092,-79.52685
1,A824739,ISLINGTON BIRMINGHAM,ISLINGTON,BIRMINGHAM,43.60326,-79.50628
2,A824752,REXDALE/ISLINGTON,REXDALE,ISLINGTON,43.71242,-79.55384
3,A824753,REXDALE/ISLINGTON,REXDALE,ISLINGTON,43.71242,-79.55384
4,A824906,DANFORTH/VICTORIA PARK,DANFORTH,VICTORIA PARK,43.69125,-79.28834
5,A824912,KIPLING/401,KIPLING,401,43.60257,-79.51854
6,A824932,DUFFERIN/GLENCAIRN,DUFFERIN,GLENCAIRN,43.707,-79.45316
7,A824940,DUFFERIN/ROGERS,DUFFERIN,ROGERS,43.68557,-79.44611
8,A824737,SPADINA DUNDAS,SPADINA,DUNDAS,43.65293,-79.39816
9,A824755,LANSDOWNE AVE,LANSDOWNE,AVE,43.66483,-79.44569


In [145]:
def get_shelters(url):
    shelters = []
    response = requests.get(url)
    soup     = BeautifulSoup(response.text, 'lxml')

    table = soup.find('table', {'id' : 'gmaptable'})
    rows  = table.find_all('tr')
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 2:
            name    = cols[0].text.strip()
            address = cols[1].text.strip()
            street, borough, province, country = address.split(',')
            shelters.append((name, street, borough, province, country))
        
    shelters = pd.DataFrame(shelters)
    shelters.columns = ['name', 'street', 'borough', 'province', 'country']

    shelters.loc[shelters.name == 'West Shelter','borough'] = 'Etobicoke'
    return shelters

url      = 'https://www.toronto.ca/community-people/animals-pets/animal-shelters/'
shelters = get_shelters(url)
shelters

Unnamed: 0,name,street,borough,province,country
0,West Shelter,146 The East Mall,Etobicoke,ON,Canada
1,North Shelter,1300 Sheppard Avenue West,North York,ON,Canada
2,East Shelter,821 Progress Avenue,Scarborough,ON,Canada


In [146]:
def get_postal_code(shelters):
    shelters.loc[shelters.name == 'West Shelter','postal_code'] = 'M8Z'
    shelters.loc[shelters.name == 'North Shelter','postal_code'] = 'M3K'
    shelters.loc[shelters.name == 'East Shelter','postal_code'] = 'M1H'
    return shelters

shelters = get_postal_code(shelters)
shelters

Unnamed: 0,name,street,borough,province,country,postal_code
0,West Shelter,146 The East Mall,Etobicoke,ON,Canada,M8Z
1,North Shelter,1300 Sheppard Avenue West,North York,ON,Canada,M3K
2,East Shelter,821 Progress Avenue,Scarborough,ON,Canada,M1H


In [147]:
coords = pd.read_csv('w3_01_list_of_postal_codes_and_coordinates.csv')
coords.rename(columns={'PostalCode':'postal_code', 'Borough':'borough', 
                              'Neighborhood':'neighborhood', 'Latitude':'latitude',
                              'Longitude':'longitude'}, inplace=True)
coords.head()

Unnamed: 0,postal_code,borough,neighborhood,latitude,longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [148]:
shelters_and_coords = pd.merge(shelters, coords, on=['postal_code'])
shelters_and_coords.drop('borough_x', axis=1, inplace=True)
shelters_and_coords.rename(columns={'borough_y':'borough'}, inplace=True)
shelters_and_coords = pd.DataFrame(shelters_and_coords)
shelters_and_coords

Unnamed: 0,name,street,province,country,postal_code,borough,neighborhood,latitude,longitude
0,West Shelter,146 The East Mall,ON,Canada,M8Z,Etobicoke,"Kingsway Park South West, Mimico NW, The Queen...",43.628841,-79.520999
1,North Shelter,1300 Sheppard Avenue West,ON,Canada,M3K,North York,"CFB Toronto, Downsview East",43.737473,-79.464763
2,East Shelter,821 Progress Avenue,ON,Canada,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [149]:
city       = 'Toronto, CA'
geolocator = Nominatim(user_agent="luiz_alberto_capstone_project")
location   = geolocator.geocode(city)
latitude   = location.latitude
longitude  = location.longitude
print('The geographical coordinate of Toronto city are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Toronto city are 43.653963, -79.387207.


In [150]:
map_of_toronto = folium.Map(location=[latitude, longitude], zoom_start=12, control_scale=False)

for lat, lng, name, street in zip(shelters_and_coords['latitude'], shelters_and_coords['longitude']
                              , shelters_and_coords['name'], shelters_and_coords['street']):
    label = '{}:{}'.format(name, street)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [lat, lng],
        #radius=10,
        popup=label
        #color='orange',
        #fill=True,
        #fill_color='orange',
        #fill_opacity=1
    ).add_to(map_of_toronto)

missed_pets_colors = {'Cat':'red', 'Dog':'blue'}
map_of_toronto_cluster = folium.plugins.MarkerCluster().add_to(map_of_toronto)
for lat, lng, category, breed in zip(missed_pets['cross_intersec_latitude'], missed_pets['cross_intersec_longitude']
                              , missed_pets['category'], missed_pets['breed']):
    label = '{}:{}'.format(category, breed)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=10,
        popup=label,
        color=missed_pets_colors[category],
        fill=True,
        fill_color=missed_pets_colors[category],
        fill_opacity=1
    ).add_to(map_of_toronto_cluster)

map_of_toronto    

In [151]:
CLIENT_ID = 'RP1P2BKPRKXDHQZIRAFU50GOPAFWLCQDFTK4NJSKIFVQND0J' # your Foursquare ID
CLIENT_SECRET = 'JYR34RL02SLO3CS25WUNAZE1KJC0BAXLSF5AGSZEJGVVONAL' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RP1P2BKPRKXDHQZIRAFU50GOPAFWLCQDFTK4NJSKIFVQND0J
CLIENT_SECRET:JYR34RL02SLO3CS25WUNAZE1KJC0BAXLSF5AGSZEJGVVONAL


In [152]:
missed_pet_cross_intersec   = missed_pets.loc[0, 'crossing_intersections'] 
missed_pet_latitude  = missed_pets.loc[0, 'cross_intersec_latitude'] 
missed_pet_longitude = missed_pets.loc[0, 'cross_intersec_longitude'] 

print('Latitude and longitude values of {} are {}, {}.'.format(missed_pet_cross_intersec, 
                                                               missed_pet_latitude, 
                                                               missed_pet_longitude))

Latitude and longitude values of THE QUEENSWAY/KIPLING are 43.62092, -79.52685.


In [153]:
# type your answer here
LIMIT  = 100
radius = 500
url    = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    missed_pet_latitude, 
    missed_pet_longitude, 
    radius, 
    LIMIT)
url


'https://api.foursquare.com/v2/venues/explore?&client_id=RP1P2BKPRKXDHQZIRAFU50GOPAFWLCQDFTK4NJSKIFVQND0J&client_secret=JYR34RL02SLO3CS25WUNAZE1KJC0BAXLSF5AGSZEJGVVONAL&v=20180605&ll=43.62092,-79.52685&radius=500&limit=100'

In [154]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c3bef08db04f57d555b0825'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Islington - City Centre West',
  'headerFullLocation': 'Islington - City Centre West, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 17,
  'suggestedBounds': {'ne': {'lat': 43.6254200045, 'lng': -79.52064544373776},
   'sw': {'lat': 43.616419995499996, 'lng': -79.53305455626223}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d1b6b5add363704bb09641a',
       'name': 'Spoon and Fork',
       'location': {'address': '1233 The Queensway, Unit 24',
        'crossStreet': 'E of Kipling Ave',
        'lat': 43.62067853539908,
        'lng': -79.5

In [155]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [156]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) 
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Spoon and Fork,Asian Restaurant,43.620679,-79.52444
1,Fit4Less,Gym,43.619745,-79.524681
2,Burrito Boyz,Burrito Place,43.621791,-79.522601
3,Fat Bastard Burrito Co.,Burrito Place,43.622099,-79.52188
4,K & B Sushi,Sushi Restaurant,43.622269,-79.522485
5,Shawarma Grill Express,Middle Eastern Restaurant,43.62176,-79.522539
6,Mangosteens Thai Cuisine,Thai Restaurant,43.621419,-79.524771
7,TD Canada Trust,Bank,43.620459,-79.526961
8,Swiss Chalet,Restaurant,43.619926,-79.525042
9,Sobeys,Supermarket,43.61952,-79.52419


In [157]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


In [158]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['crossing_intersections', 
                  'cross_intersec_latitude', 
                  'cross_intersec_longitude', 
                  'venue', 
                  'venue_latitude', 
                  'venue_longitude', 
                  'venue_category']
    
    return(nearby_venues)

In [159]:
missed_pets_venues = getNearbyVenues(names=missed_pets['crossing_intersections'],
                                   latitudes=missed_pets['cross_intersec_latitude'],
                                   longitudes=missed_pets['cross_intersec_longitude']
                                  )

THE QUEENSWAY/KIPLING
ISLINGTON BIRMINGHAM
REXDALE/ISLINGTON
REXDALE/ISLINGTON
DANFORTH/VICTORIA PARK
KIPLING/401
DUFFERIN/GLENCAIRN
DUFFERIN/ROGERS
SPADINA DUNDAS
LANSDOWNE AVE
KEELE/SHEPPARD
KEELE/SHEPPARD
KEELE/SHEPPARD
GREENWOOD/DANFORTH
ROGERS/KEELE
JANE/WILSON


In [160]:
print(missed_pets_venues.shape)
missed_pets_venues.head(10)

(342, 7)


Unnamed: 0,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,venue,venue_latitude,venue_longitude,venue_category
0,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Spoon and Fork,43.620679,-79.52444,Asian Restaurant
1,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Fit4Less,43.619745,-79.524681,Gym
2,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Burrito Boyz,43.621791,-79.522601,Burrito Place
3,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Fat Bastard Burrito Co.,43.622099,-79.52188,Burrito Place
4,THE QUEENSWAY/KIPLING,43.62092,-79.52685,K & B Sushi,43.622269,-79.522485,Sushi Restaurant
5,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Shawarma Grill Express,43.62176,-79.522539,Middle Eastern Restaurant
6,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Mangosteens Thai Cuisine,43.621419,-79.524771,Thai Restaurant
7,THE QUEENSWAY/KIPLING,43.62092,-79.52685,TD Canada Trust,43.620459,-79.526961,Bank
8,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Swiss Chalet,43.619926,-79.525042,Restaurant
9,THE QUEENSWAY/KIPLING,43.62092,-79.52685,Sobeys,43.61952,-79.52419,Supermarket


In [161]:
missed_pets_venues.groupby('crossing_intersections').count().head(20)

Unnamed: 0_level_0,cross_intersec_latitude,cross_intersec_longitude,venue,venue_latitude,venue_longitude,venue_category
crossing_intersections,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DANFORTH/VICTORIA PARK,27,27,27,27,27,27
DUFFERIN/GLENCAIRN,10,10,10,10,10,10
DUFFERIN/ROGERS,5,5,5,5,5,5
GREENWOOD/DANFORTH,33,33,33,33,33,33
ISLINGTON BIRMINGHAM,15,15,15,15,15,15
JANE/WILSON,18,18,18,18,18,18
KEELE/SHEPPARD,36,36,36,36,36,36
KIPLING/401,5,5,5,5,5,5
LANSDOWNE AVE,28,28,28,28,28,28
REXDALE/ISLINGTON,34,34,34,34,34,34


In [162]:
print('There are {} uniques categories.'.format(len(missed_pets_venues['venue_category'].unique())))

There are 115 uniques categories.


In [163]:
missed_pets_onehot = pd.get_dummies(missed_pets_venues[['venue_category']], prefix="", prefix_sep="")
missed_pets_onehot['crossing_intersections'] = missed_pets_venues['crossing_intersections'] 
fixed_columns = [missed_pets_onehot.columns[-1]] + list(missed_pets_onehot.columns[:-1])
missed_pets_onehot = missed_pets_onehot[fixed_columns]
missed_pets_onehot.head()

Unnamed: 0,crossing_intersections,African Restaurant,American Restaurant,Arepa Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,...,Taco Place,Tea Room,Thai Restaurant,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,THE QUEENSWAY/KIPLING,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,THE QUEENSWAY/KIPLING,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,THE QUEENSWAY/KIPLING,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,THE QUEENSWAY/KIPLING,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,THE QUEENSWAY/KIPLING,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [164]:
missed_pets_onehot.shape

(342, 116)

In [165]:
missed_pets_grouped = missed_pets_onehot.groupby('crossing_intersections').mean().reset_index()
missed_pets_grouped.head(15)

Unnamed: 0,crossing_intersections,African Restaurant,American Restaurant,Arepa Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,...,Taco Place,Tea Room,Thai Restaurant,Thrift / Vintage Store,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,DANFORTH/VICTORIA PARK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.037037,0.037037,0.0,0.0,0.037037
1,DUFFERIN/GLENCAIRN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,DUFFERIN/ROGERS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,GREENWOOD/DANFORTH,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ISLINGTON BIRMINGHAM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,JANE/WILSON,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0
6,KEELE/SHEPPARD,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0
7,KIPLING/401,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,LANSDOWNE AVE,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,REXDALE/ISLINGTON,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0


In [166]:
missed_pets_grouped.shape

(13, 116)

In [167]:
num_top_venues = 5

for hood in missed_pets_grouped['crossing_intersections']:
    print("----"+hood+"----")
    temp = missed_pets_grouped[missed_pets_grouped['crossing_intersections'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----DANFORTH/VICTORIA PARK----
           venue  freq
0  Grocery Store  0.19
1    Coffee Shop  0.15
2  Women's Store  0.04
3   Liquor Store  0.04
4       Pharmacy  0.04


----DUFFERIN/GLENCAIRN----
                       venue  freq
0       Fast Food Restaurant   0.2
1  Latin American Restaurant   0.1
2            Photography Lab   0.1
3                Pizza Place   0.1
4        Rental Car Location   0.1


----DUFFERIN/ROGERS----
                 venue  freq
0                  Gym   0.2
1          Pizza Place   0.2
2   Mexican Restaurant   0.2
3           Beer Store   0.2
4  Sporting Goods Shop   0.2


----GREENWOOD/DANFORTH----
                  venue  freq
0                  Café  0.18
1  Ethiopian Restaurant  0.09
2           Coffee Shop  0.09
3              Beer Bar  0.06
4          Dessert Shop  0.03


----ISLINGTON BIRMINGHAM----
                venue  freq
0                Café  0.20
1  Mexican Restaurant  0.13
2                 Gym  0.07
3  Italian Restaurant  0.07
4          R

In [168]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [169]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['crossing_intersections']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
missed_pets_venues_sorted = pd.DataFrame(columns=columns)
missed_pets_venues_sorted['crossing_intersections'] = missed_pets_grouped['crossing_intersections']

for ind in np.arange(missed_pets_grouped.shape[0]):
    missed_pets_venues_sorted.iloc[ind, 1:] = return_most_common_venues(missed_pets_grouped.iloc[ind, :], num_top_venues)

missed_pets_venues_sorted.head(15)

Unnamed: 0,crossing_intersections,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,DANFORTH/VICTORIA PARK,Grocery Store,Coffee Shop,Women's Store,Beer Store,Gym / Fitness Center,Food & Drink Shop,Fast Food Restaurant,Discount Store,Liquor Store,Clothing Store
1,DUFFERIN/GLENCAIRN,Fast Food Restaurant,Paintball Field,Photography Lab,Rental Car Location,Mediterranean Restaurant,Pizza Place,Gym / Fitness Center,Grocery Store,Latin American Restaurant,Coffee Shop
2,DUFFERIN/ROGERS,Gym,Sporting Goods Shop,Pizza Place,Mexican Restaurant,Beer Store,Fried Chicken Joint,French Restaurant,Cocktail Bar,Coffee Shop,Comfort Food Restaurant
3,GREENWOOD/DANFORTH,Café,Ethiopian Restaurant,Coffee Shop,Beer Bar,Hostel,Metro Station,Convenience Store,Liquor Store,Park,Karaoke Bar
4,ISLINGTON BIRMINGHAM,Café,Mexican Restaurant,Bakery,Indian Restaurant,Italian Restaurant,Pharmacy,Pizza Place,Pub,Restaurant,Seafood Restaurant
5,JANE/WILSON,Vietnamese Restaurant,Coffee Shop,Pharmacy,Clothing Store,Grocery Store,Fried Chicken Joint,Hockey Arena,Convenience Store,Sandwich Place,Discount Store
6,KEELE/SHEPPARD,Grocery Store,Pizza Place,Music Venue,Athletics & Sports,Cosmetics Shop,Spa,Fast Food Restaurant,Sandwich Place,Park,Vietnamese Restaurant
7,KIPLING/401,Skating Rink,Coffee Shop,Pizza Place,Bakery,Breakfast Spot,Ethiopian Restaurant,Cocktail Bar,Comfort Food Restaurant,Convenience Store,Cosmetics Shop
8,LANSDOWNE AVE,Coffee Shop,Café,Grocery Store,Park,Portuguese Restaurant,Skating Rink,Gym / Fitness Center,Discount Store,Liquor Store,Music Venue
9,REXDALE/ISLINGTON,African Restaurant,Fish & Chips Shop,Café,Pet Store,Discount Store,Bookstore,Restaurant,Sandwich Place,Intersection,Fast Food Restaurant


In [170]:
kclusters = 5
missed_pets_grouped_clustering = missed_pets_grouped.drop('crossing_intersections', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(missed_pets_grouped_clustering)
kmeans.labels_

array([0, 2, 4, 3, 3, 0, 2, 1, 3, 0, 3, 3, 0], dtype=int32)

In [183]:
missed_pets_merged = missed_pets_grouped
missed_pets_merged['cluster_labels'] = kmeans.labels_
missed_pets_merged = missed_pets_merged[['crossing_intersections', 'cluster_labels']]
missed_pets_merged = missed_pets_merged.join(missed_pets_venues_sorted.set_index('crossing_intersections'), on='crossing_intersections')
missed_pets_merged = missed_pets.join(missed_pets_merged.reset_index().set_index('crossing_intersections'), on='crossing_intersections')
missed_pets_merged[['breed', 'sex', 'colour', 'crossing_intersections', 'cluster_labels', '1st Most Common Venue', '2nd Most Common Venue', '3rd Most Common Venue']].head(20) 


Unnamed: 0,breed,sex,colour,crossing_intersections,cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,DOMESTIC SH,Male,ORANGE,THE QUEENSWAY/KIPLING,0,Burrito Place,Gym,Bakery
1,DOMESTIC SH,Unknown,WHITE,ISLINGTON BIRMINGHAM,3,Café,Mexican Restaurant,Bakery
2,DOMESTIC SH,Male,BLACK,REXDALE/ISLINGTON,0,African Restaurant,Fish & Chips Shop,Café
3,DOMESTIC SH,Female,BLACK,REXDALE/ISLINGTON,0,African Restaurant,Fish & Chips Shop,Café
4,DOMESTIC SH,Male,BRN TABBY,DANFORTH/VICTORIA PARK,0,Grocery Store,Coffee Shop,Women's Store
5,DOMESTIC SH,Unknown,BRN TABBY,KIPLING/401,1,Skating Rink,Coffee Shop,Pizza Place
6,DOMESTIC SH,Male,BRN TABBY,DUFFERIN/GLENCAIRN,2,Fast Food Restaurant,Paintball Field,Photography Lab
7,DOMESTIC SH,Female,BRN TABBY,DUFFERIN/ROGERS,4,Gym,Sporting Goods Shop,Pizza Place
8,SHIH TZU,Male,WHITE,SPADINA DUNDAS,3,Café,Bar,Vietnamese Restaurant
9,SIBERIAN HUSKY,Neutered Male,WHITE,LANSDOWNE AVE,3,Coffee Shop,Café,Grocery Store


In [172]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
missed_pets_merged.reset_index(inplace=True)
map_of_toronto_cluster = folium.plugins.MarkerCluster().add_to(map_clusters)
for lat, lon, poi, cluster in zip(missed_pets_merged['cross_intersec_latitude']
                                  , missed_pets_merged['cross_intersec_longitude']
                                  , missed_pets_merged['crossing_intersections']
                                  , missed_pets_merged['cluster_labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_of_toronto_cluster)
       
map_clusters

In [184]:
missed_pets_merged.loc[missed_pets_merged['cluster_labels'] == 0, missed_pets_merged.columns[[1] + list(range(5, missed_pets_merged.shape[1]))]]

Unnamed: 0,breed,receiving_shelter,id,crossing_intersections,cross_intersec_st1,cross_intersec_st2,category,cross_intersec_latitude,cross_intersec_longitude,index,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,DOMESTIC SH,West Region,A824678,THE QUEENSWAY/KIPLING,THE QUEENSWAY,KIPLING,Cat,43.62092,-79.52685,12,...,Burrito Place,Gym,Bakery,Gym / Fitness Center,Mattress Store,Middle Eastern Restaurant,Optical Shop,Buffet,Restaurant,Sandwich Place
2,DOMESTIC SH,North Region,A824752,REXDALE/ISLINGTON,REXDALE,ISLINGTON,Cat,43.71242,-79.55384,9,...,African Restaurant,Fish & Chips Shop,Café,Pet Store,Discount Store,Bookstore,Restaurant,Sandwich Place,Intersection,Fast Food Restaurant
3,DOMESTIC SH,North Region,A824753,REXDALE/ISLINGTON,REXDALE,ISLINGTON,Cat,43.71242,-79.55384,9,...,African Restaurant,Fish & Chips Shop,Café,Pet Store,Discount Store,Bookstore,Restaurant,Sandwich Place,Intersection,Fast Food Restaurant
4,DOMESTIC SH,North Region,A824906,DANFORTH/VICTORIA PARK,DANFORTH,VICTORIA PARK,Cat,43.69125,-79.28834,0,...,Grocery Store,Coffee Shop,Women's Store,Beer Store,Gym / Fitness Center,Food & Drink Shop,Fast Food Restaurant,Discount Store,Liquor Store,Clothing Store
15,MIN PINSCHER,North Region,A824909,JANE/WILSON,JANE,WILSON,Dog,43.72093,-79.50863,5,...,Vietnamese Restaurant,Coffee Shop,Pharmacy,Clothing Store,Grocery Store,Fried Chicken Joint,Hockey Arena,Convenience Store,Sandwich Place,Discount Store


In [185]:
missed_pets_merged.loc[missed_pets_merged['cluster_labels'] == 1, missed_pets_merged.columns[[1] + list(range(5, missed_pets_merged.shape[1]))]]

Unnamed: 0,breed,receiving_shelter,id,crossing_intersections,cross_intersec_st1,cross_intersec_st2,category,cross_intersec_latitude,cross_intersec_longitude,index,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,DOMESTIC SH,West Region,A824912,KIPLING/401,KIPLING,401,Cat,43.60257,-79.51854,7,...,Skating Rink,Coffee Shop,Pizza Place,Bakery,Breakfast Spot,Ethiopian Restaurant,Cocktail Bar,Comfort Food Restaurant,Convenience Store,Cosmetics Shop


In [186]:
missed_pets_merged.loc[missed_pets_merged['cluster_labels'] == 2, missed_pets_merged.columns[[1] + list(range(5, missed_pets_merged.shape[1]))]]

Unnamed: 0,breed,receiving_shelter,id,crossing_intersections,cross_intersec_st1,cross_intersec_st2,category,cross_intersec_latitude,cross_intersec_longitude,index,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,DOMESTIC SH,Found Animal Report,A824932,DUFFERIN/GLENCAIRN,DUFFERIN,GLENCAIRN,Cat,43.707,-79.45316,1,...,Fast Food Restaurant,Paintball Field,Photography Lab,Rental Car Location,Mediterranean Restaurant,Pizza Place,Gym / Fitness Center,Grocery Store,Latin American Restaurant,Coffee Shop
10,BICHON FRISE,North Region,A824812,KEELE/SHEPPARD,KEELE,SHEPPARD,Dog,43.74481,-79.48639,6,...,Grocery Store,Pizza Place,Music Venue,Athletics & Sports,Cosmetics Shop,Spa,Fast Food Restaurant,Sandwich Place,Park,Vietnamese Restaurant
11,YORKSHIRE TERR,North Region,A824813,KEELE/SHEPPARD,KEELE,SHEPPARD,Dog,43.74481,-79.48639,6,...,Grocery Store,Pizza Place,Music Venue,Athletics & Sports,Cosmetics Shop,Spa,Fast Food Restaurant,Sandwich Place,Park,Vietnamese Restaurant
12,YORKSHIRE TERR,North Region,A824814,KEELE/SHEPPARD,KEELE,SHEPPARD,Dog,43.74481,-79.48639,6,...,Grocery Store,Pizza Place,Music Venue,Athletics & Sports,Cosmetics Shop,Spa,Fast Food Restaurant,Sandwich Place,Park,Vietnamese Restaurant


In [187]:
missed_pets_merged.loc[missed_pets_merged['cluster_labels'] == 3, missed_pets_merged.columns[[1] + list(range(5, missed_pets_merged.shape[1]))]]

Unnamed: 0,breed,receiving_shelter,id,crossing_intersections,cross_intersec_st1,cross_intersec_st2,category,cross_intersec_latitude,cross_intersec_longitude,index,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,DOMESTIC SH,West Region,A824739,ISLINGTON BIRMINGHAM,ISLINGTON,BIRMINGHAM,Cat,43.60326,-79.50628,4,...,Café,Mexican Restaurant,Bakery,Indian Restaurant,Italian Restaurant,Pharmacy,Pizza Place,Pub,Restaurant,Seafood Restaurant
8,SHIH TZU,West Region,A824737,SPADINA DUNDAS,SPADINA,DUNDAS,Dog,43.65293,-79.39816,11,...,Café,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Dumpling Restaurant,Dessert Shop,Chinese Restaurant,Coffee Shop,Ramen Restaurant,Bakery
9,SIBERIAN HUSKY,North Region,A824755,LANSDOWNE AVE,LANSDOWNE,AVE,Dog,43.66483,-79.44569,8,...,Coffee Shop,Café,Grocery Store,Park,Portuguese Restaurant,Skating Rink,Gym / Fitness Center,Discount Store,Liquor Store,Music Venue
13,AMER BULLDOG,Found Animal Report,A824891,GREENWOOD/DANFORTH,GREENWOOD,DANFORTH,Dog,43.68148,-79.33234,3,...,Café,Ethiopian Restaurant,Coffee Shop,Beer Bar,Hostel,Metro Station,Convenience Store,Liquor Store,Park,Karaoke Bar
14,TERRIER,North Region,A824903,ROGERS/KEELE,ROGERS,KEELE,Dog,43.68213,-79.47159,10,...,BBQ Joint,Bakery,Pizza Place,Department Store,Latin American Restaurant,Beer Store,Discount Store,Sandwich Place,Thrift / Vintage Store,Café


In [188]:
missed_pets_merged.loc[missed_pets_merged['cluster_labels'] == 4, missed_pets_merged.columns[[1] + list(range(5, missed_pets_merged.shape[1]))]]

Unnamed: 0,breed,receiving_shelter,id,crossing_intersections,cross_intersec_st1,cross_intersec_st2,category,cross_intersec_latitude,cross_intersec_longitude,index,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,DOMESTIC SH,North Region,A824940,DUFFERIN/ROGERS,DUFFERIN,ROGERS,Cat,43.68557,-79.44611,2,...,Gym,Sporting Goods Shop,Pizza Place,Mexican Restaurant,Beer Store,Fried Chicken Joint,French Restaurant,Cocktail Bar,Coffee Shop,Comfort Food Restaurant
