# Segmentation of Lost Animals by Venues
---

## 1. Importing Required Packages

In [92]:
import folium 
import folium.plugins
import matplotlib.cm as cm
import matplotlib.colors as colors
import numpy  as np
import pandas as pd
import requests
import geocoder

from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from pandas.io.json  import json_normalize 
from sklearn.cluster import KMeans

## 2. Getting Lost Pets Data

In [93]:
def get_coordinates(city, street1, street2=''):
    
    api_id   = '79foQR1GPJRvsWDGB0Ul'
    api_code = 'E5YKLSl_O29hf-ipUlPFfQ'
    latitude = longitude = 0.0
    
    if (street2 == ''):
        url = 'https://geocoder.api.here.com/6.2/geocode.json?city={}&street={}&app_id={}&app_code={}&gen=9'
        address  = url.format(city, street1, api_id, api_code)
    else:
        url = 'https://geocoder.api.here.com/6.2/geocode.json?city={}&street={}@{}&app_id={}&app_code={}&gen=9'
        address  = url.format(city, street1, street2, api_id, api_code)
        
    try:
        response = requests.get(address).json()
        localization = json_normalize(response['Response']['View'][0]['Result'][0]['Location']['DisplayPosition'])
        latitude  = localization.loc[0, 'Latitude']
        longitude = localization.loc[0, 'Longitude']
    except Exception as e:
        print('Adress {}/{} was not found in geocoder database: {}! '.format(street1, city, str(e)))
        
    return((latitude, longitude))

In [94]:
def get_lost_pets(city, url):
    pets = []
    categories = ['Cat', 'Dog']
    
    response = requests.get(url)
    soup     = BeautifulSoup(response.text, 'lxml')
    tables   = soup.find_all('table')
    
    for index, table in enumerate(tables):
        rows = table.find_all('tr')
        for row in rows:
            cols = row.find_all('td')
            if len(cols) == 8:
                # cleaning crossing intersections data.
                cross_intersecs = cols[7].text.strip()
                cross_intersecs = cross_intersecs.replace(' AND ', '/')
                
                # if crossing intersections was not informed, the lost pet data
                # will be exclude from dataset.
                if cross_intersecs != '':
                    streets = cross_intersecs.split('/')
                    if (len(streets) < 2):
                        streets = cross_intersecs.split(' ')
                    
                    latitude, longitude = get_coordinates(city, streets[0], streets[1])
                    pets.append((cols[0].text.strip(), cols[1].text.strip(), cols[2].text.strip()
                                      , cols[3].text.strip(), cols[4].text.strip(), cols[5].text.strip()
                                      , cols[6].text.strip(), cross_intersecs, latitude
                                      , longitude, categories[index]))
        
    pets = pd.DataFrame(pets)
    pets.columns = ['date', 'breed', 'age'
                   , 'sex', 'colour', 'receiving_shelter'
                   , 'id', 'crossing_intersections', 'cross_intersec_latitude' 
                   , 'cross_intersec_longitude', 'category']
    
    return pets

In [95]:
# getting lost pet information from animal service of Toronto.
url     = 'https://www.toronto.ca/data/mls/animals/strayanimals.html'
lost_pets = get_lost_pets('Toronto', url)
lost_pets.head(15)

Unnamed: 0,date,breed,age,sex,colour,receiving_shelter,id,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,category
0,2019-01-12,DOMESTIC SH,3Y,Male,BRN TABBY,North Region,A824906,DANFORTH/VICTORIA PARK,43.69125,-79.28834,Cat
1,2019-01-12,DOMESTIC SH,,Unknown,BRN TABBY,West Region,A824912,KIPLING/401,43.60257,-79.51854,Cat
2,2019-01-12,DOMESTIC SH,,Male,BRN TABBY,Found Animal Report,A824932,DUFFERIN/GLENCAIRN,43.707,-79.45316,Cat
3,2019-01-13,DOMESTIC SH,,Female,BRN TABBY,North Region,A824940,DUFFERIN/ROGERS,43.68557,-79.44611,Cat
4,2019-01-13,DOMESTIC SH,1Y,Female,ORG TABBY,Found Animal Report,A824950,DUFFERIN/EGLINTON,43.69568,-79.4503,Cat
5,2019-01-10,BICHON FRISE,,Female,WHITE,North Region,A824812,KEELE/SHEPPARD,43.74481,-79.48639,Dog
6,2019-01-10,YORKSHIRE TERR,,Neutered Male,BLACK,North Region,A824813,KEELE/SHEPPARD,43.74481,-79.48639,Dog
7,2019-01-10,YORKSHIRE TERR,,Spayed Female,BLACK,North Region,A824814,KEELE/SHEPPARD,43.74481,-79.48639,Dog
8,2019-01-11,AMER BULLDOG,3Y,Female,GREY,Found Animal Report,A824891,GREENWOOD/DANFORTH,43.68148,-79.33234,Dog
9,2019-01-12,MIN PINSCHER,,Male,BLACK,North Region,A824909,JANE/WILSON,43.72093,-79.50863,Dog


## 3. Getting Receiving Shelters Data

In [96]:
def get_shelters(city, url):
    shelters = []
    response = requests.get(url)
    soup     = BeautifulSoup(response.text, 'lxml')

    table = soup.find('table', {'id' : 'gmaptable'})
    rows  = table.find_all('tr')
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 2:
            name    = cols[0].text.strip()
            address = cols[1].text.strip()
            street, borough, province, country = address.split(',')
            latitude, longitude = get_coordinates(city, street)
            shelters.append((name, street, borough, city, province, country, latitude, longitude))
            
    shelters = pd.DataFrame(shelters)
    shelters.columns = ['name', 'street', 'borough', 'city', 'province', 'country', 'latitude', 'longitude']

    shelters.loc[shelters.name == 'West Shelter','borough'] = 'Etobicoke'
    return shelters

In [97]:
url      = 'https://www.toronto.ca/community-people/animals-pets/animal-shelters/'
shelters = get_shelters('Toronto', url)
shelters

Unnamed: 0,name,street,borough,city,province,country,latitude,longitude
0,West Shelter,146 The East Mall,Etobicoke,Toronto,ON,Canada,43.62367,-79.55002
1,North Shelter,1300 Sheppard Avenue West,North York,Toronto,ON,Canada,43.7532,-79.48277
2,East Shelter,821 Progress Avenue,Scarborough,Toronto,ON,Canada,43.7802,-79.24161


## 4. Plotting Shelters and Lost Pets Geographical Localisation.

In [98]:
city       = 'Toronto, CA'
geolocator = Nominatim(user_agent="luiz_alberto_capstone_project")
location   = geolocator.geocode(city)
latitude   = location.latitude
longitude  = location.longitude
print('The geographical coordinate of Toronto city are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Toronto city are 43.653963, -79.387207.


In [99]:
map_of_toronto = folium.Map(location=[latitude, longitude], zoom_start=12, control_scale=False)

for lat, lng, name, street in zip(shelters['latitude'], shelters['longitude']
                              , shelters['name'], shelters['street']):
    label = '{}:{}'.format(name, street)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [lat, lng],
        popup=label
    ).add_to(map_of_toronto)

lost_pets_colors = {'Cat':'red', 'Dog':'blue'}
map_of_toronto_cluster = folium.plugins.MarkerCluster().add_to(map_of_toronto)
for lat, lng, category, breed in zip(lost_pets['cross_intersec_latitude'], lost_pets['cross_intersec_longitude']
                              , lost_pets['category'], lost_pets['breed']):
    label = '{}:{}'.format(category, breed)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=10,
        popup=label,
        color=lost_pets_colors[category],
        fill=True,
        fill_color=lost_pets_colors[category],
        fill_opacity=1
    ).add_to(map_of_toronto_cluster)

map_of_toronto    

## 5. Segmenting Lost Pets

In [100]:
CLIENT_ID = 'RP1P2BKPRKXDHQZIRAFU50GOPAFWLCQDFTK4NJSKIFVQND0J' # your Foursquare ID
CLIENT_SECRET = 'JYR34RL02SLO3CS25WUNAZE1KJC0BAXLSF5AGSZEJGVVONAL' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RP1P2BKPRKXDHQZIRAFU50GOPAFWLCQDFTK4NJSKIFVQND0J
CLIENT_SECRET:JYR34RL02SLO3CS25WUNAZE1KJC0BAXLSF5AGSZEJGVVONAL


In [101]:
lost_pet_cross_intersec   = lost_pets.loc[0, 'crossing_intersections'] 
lost_pet_latitude  = lost_pets.loc[0, 'cross_intersec_latitude'] 
lost_pet_longitude = lost_pets.loc[0, 'cross_intersec_longitude'] 

print('Latitude and longitude values of {} are {}, {}.'.format(lost_pet_cross_intersec, 
                                                               lost_pet_latitude, 
                                                               lost_pet_longitude))

Latitude and longitude values of DANFORTH/VICTORIA PARK are 43.69125, -79.28834.


In [102]:
# type your answer here
LIMIT  = 100
radius = 500
url    = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lost_pet_latitude, 
    lost_pet_longitude, 
    radius, 
    LIMIT)
url


'https://api.foursquare.com/v2/venues/explore?&client_id=RP1P2BKPRKXDHQZIRAFU50GOPAFWLCQDFTK4NJSKIFVQND0J&client_secret=JYR34RL02SLO3CS25WUNAZE1KJC0BAXLSF5AGSZEJGVVONAL&v=20180605&ll=43.69125,-79.28834&radius=500&limit=100'

In [103]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c3cc5724c1f671d2ae3e941'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Birch Cliff',
  'headerFullLocation': 'Birch Cliff, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 27,
  'suggestedBounds': {'ne': {'lat': 43.6957500045, 'lng': -79.28212817260136},
   'sw': {'lat': 43.686749995499994, 'lng': -79.29455182739865}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '52ec60c1498e0c441582be5f',
       'name': 'LA Fitness',
       'location': {'address': '3003 Danforth Avenue',
        'lat': 43.69035951555018,
        'lng': -79.29133908116464,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.69035

In [104]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [105]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) 
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,LA Fitness,Gym / Fitness Center,43.69036,-79.291339
1,TD Canada Trust,Bank,43.691527,-79.288868
2,LCBO,Liquor Store,43.691295,-79.285971
3,Staples,Paper / Office Supplies Store,43.68966,-79.289451
4,Tim Hortons,Coffee Shop,43.687232,-79.286015


In [106]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

27 venues were returned by Foursquare.


In [107]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['crossing_intersections', 
                  'cross_intersec_latitude', 
                  'cross_intersec_longitude', 
                  'venue', 
                  'venue_latitude', 
                  'venue_longitude', 
                  'venue_category']
    
    return(nearby_venues)

In [108]:
lost_pets_venues = getNearbyVenues(names=lost_pets['crossing_intersections'],
                                   latitudes=lost_pets['cross_intersec_latitude'],
                                   longitudes=lost_pets['cross_intersec_longitude']
                                  )

DANFORTH/VICTORIA PARK
KIPLING/401
DUFFERIN/GLENCAIRN
DUFFERIN/ROGERS
DUFFERIN/EGLINTON
KEELE/SHEPPARD
KEELE/SHEPPARD
KEELE/SHEPPARD
GREENWOOD/DANFORTH
JANE/WILSON
DOWNSVIEW  PARK
DUFFERIN/ST CLAIR


In [109]:
print(lost_pets_venues.shape)
lost_pets_venues.head()

(172, 7)


Unnamed: 0,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,venue,venue_latitude,venue_longitude,venue_category
0,DANFORTH/VICTORIA PARK,43.69125,-79.28834,LA Fitness,43.69036,-79.291339,Gym / Fitness Center
1,DANFORTH/VICTORIA PARK,43.69125,-79.28834,TD Canada Trust,43.691527,-79.288868,Bank
2,DANFORTH/VICTORIA PARK,43.69125,-79.28834,LCBO,43.691295,-79.285971,Liquor Store
3,DANFORTH/VICTORIA PARK,43.69125,-79.28834,Staples,43.68966,-79.289451,Paper / Office Supplies Store
4,DANFORTH/VICTORIA PARK,43.69125,-79.28834,Tim Hortons,43.687232,-79.286015,Coffee Shop


In [110]:
lost_pets_venues.groupby('crossing_intersections').count().head(20)

Unnamed: 0_level_0,cross_intersec_latitude,cross_intersec_longitude,venue,venue_latitude,venue_longitude,venue_category
crossing_intersections,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DANFORTH/VICTORIA PARK,27,27,27,27,27,27
DOWNSVIEW PARK,6,6,6,6,6,6
DUFFERIN/EGLINTON,10,10,10,10,10,10
DUFFERIN/GLENCAIRN,10,10,10,10,10,10
DUFFERIN/ROGERS,5,5,5,5,5,5
DUFFERIN/ST CLAIR,22,22,22,22,22,22
GREENWOOD/DANFORTH,33,33,33,33,33,33
JANE/WILSON,18,18,18,18,18,18
KEELE/SHEPPARD,36,36,36,36,36,36
KIPLING/401,5,5,5,5,5,5


In [111]:
print('There are {} uniques categories.'.format(len(lost_pets_venues['venue_category'].unique())))

There are 64 uniques categories.


In [112]:
lost_pets_onehot = pd.get_dummies(lost_pets_venues[['venue_category']], prefix="", prefix_sep="")
lost_pets_onehot['crossing_intersections'] = lost_pets_venues['crossing_intersections'] 
fixed_columns = [lost_pets_onehot.columns[-1]] + list(lost_pets_onehot.columns[:-1])
lost_pets_onehot = lost_pets_onehot[fixed_columns]
lost_pets_onehot.head()

Unnamed: 0,crossing_intersections,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Beer Bar,Beer Store,Big Box Store,Brazilian Restaurant,...,Skating Rink,Spa,Sporting Goods Shop,Sushi Restaurant,Thai Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,DANFORTH/VICTORIA PARK,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,DANFORTH/VICTORIA PARK,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,DANFORTH/VICTORIA PARK,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,DANFORTH/VICTORIA PARK,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,DANFORTH/VICTORIA PARK,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [113]:
lost_pets_onehot.shape

(172, 65)

In [114]:
lost_pets_grouped = lost_pets_onehot.groupby('crossing_intersections').mean().reset_index()
lost_pets_grouped.head()

Unnamed: 0,crossing_intersections,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Beer Bar,Beer Store,Big Box Store,Brazilian Restaurant,...,Skating Rink,Spa,Sporting Goods Shop,Sushi Restaurant,Thai Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,DANFORTH/VICTORIA PARK,0.0,0.0,0.037037,0.037037,0.0,0.0,0.037037,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.037037,0.037037,0.0,0.0,0.037037
1,DOWNSVIEW PARK,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0
2,DUFFERIN/EGLINTON,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0
3,DUFFERIN/GLENCAIRN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,DUFFERIN/ROGERS,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [115]:
lost_pets_grouped.shape

(10, 65)

In [116]:
num_top_venues = 5

for hood in lost_pets_grouped['crossing_intersections']:
    print("----"+hood+"----")
    temp = lost_pets_grouped[lost_pets_grouped['crossing_intersections'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----DANFORTH/VICTORIA PARK----
            venue  freq
0   Grocery Store  0.19
1     Coffee Shop  0.15
2   Women's Store  0.04
3  Clothing Store  0.04
4    Liquor Store  0.04


----DOWNSVIEW  PARK----
                   venue  freq
0            Pizza Place  0.17
1                   Café  0.17
2                 Bakery  0.17
3  Vietnamese Restaurant  0.17
4   Outdoor Supply Store  0.17


----DUFFERIN/EGLINTON----
                 venue  freq
0  Japanese Restaurant   0.1
1      Thai Restaurant   0.1
2                 Park   0.1
3          Coffee Shop   0.1
4        Grocery Store   0.1


----DUFFERIN/GLENCAIRN----
                       venue  freq
0       Fast Food Restaurant   0.2
1            Paintball Field   0.1
2  Latin American Restaurant   0.1
3            Photography Lab   0.1
4   Mediterranean Restaurant   0.1


----DUFFERIN/ROGERS----
                 venue  freq
0  Sporting Goods Shop   0.2
1   Mexican Restaurant   0.2
2          Pizza Place   0.2
3           Beer Store   0.2
4

In [117]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [118]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['crossing_intersections']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
lost_pets_venues_sorted = pd.DataFrame(columns=columns)
lost_pets_venues_sorted['crossing_intersections'] = lost_pets_grouped['crossing_intersections']

for ind in np.arange(lost_pets_grouped.shape[0]):
    lost_pets_venues_sorted.iloc[ind, 1:] = return_most_common_venues(lost_pets_grouped.iloc[ind, :], num_top_venues)

lost_pets_venues_sorted.head(15)

Unnamed: 0,crossing_intersections,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,DANFORTH/VICTORIA PARK,Grocery Store,Coffee Shop,Women's Store,Burger Joint,Gas Station,Food & Drink Shop,Liquor Store,Fast Food Restaurant,Discount Store,Clothing Store
1,DOWNSVIEW PARK,Vietnamese Restaurant,Bakery,Pharmacy,Café,Pizza Place,Outdoor Supply Store,Women's Store,Ethiopian Restaurant,Dessert Shop,Dim Sum Restaurant
2,DUFFERIN/EGLINTON,Park,Japanese Restaurant,Pharmacy,Discount Store,Grocery Store,Coffee Shop,Thai Restaurant,Bank,Bakery,Wings Joint
3,DUFFERIN/GLENCAIRN,Fast Food Restaurant,Mediterranean Restaurant,Gym / Fitness Center,Photography Lab,Pizza Place,Rental Car Location,Paintball Field,Latin American Restaurant,Grocery Store,Food & Drink Shop
4,DUFFERIN/ROGERS,Gym,Pizza Place,Sporting Goods Shop,Beer Store,Mexican Restaurant,Women's Store,Ethiopian Restaurant,Dessert Shop,Dim Sum Restaurant,Discount Store
5,DUFFERIN/ST CLAIR,Italian Restaurant,Breakfast Spot,Coffee Shop,Lounge,Vietnamese Restaurant,Convenience Store,Thai Restaurant,Sushi Restaurant,Mediterranean Restaurant,Martial Arts Dojo
6,GREENWOOD/DANFORTH,Café,Coffee Shop,Ethiopian Restaurant,Beer Bar,Park,Gym / Fitness Center,Hostel,Karaoke Bar,Liquor Store,Dim Sum Restaurant
7,JANE/WILSON,Vietnamese Restaurant,Coffee Shop,Fried Chicken Joint,Sandwich Place,Convenience Store,Clothing Store,Hockey Arena,Pharmacy,Fast Food Restaurant,Pizza Place
8,KEELE/SHEPPARD,Pizza Place,Grocery Store,Park,Spa,Cosmetics Shop,Fast Food Restaurant,Sandwich Place,Athletics & Sports,Vietnamese Restaurant,Music Venue
9,KIPLING/401,Skating Rink,Coffee Shop,Bakery,Pizza Place,Breakfast Spot,Food & Drink Shop,Dim Sum Restaurant,Discount Store,Ethiopian Restaurant,Fast Food Restaurant


## 6. Clustering Crossing Intersections

In [119]:
kclusters = 5
lost_pets_grouped_clustering = lost_pets_grouped.drop('crossing_intersections', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(lost_pets_grouped_clustering)
kmeans.labels_

array([1, 3, 1, 2, 4, 1, 1, 1, 2, 0], dtype=int32)

In [127]:
lost_pets_merged = lost_pets_grouped
lost_pets_merged['cluster_labels'] = kmeans.labels_
lost_pets_merged = lost_pets_merged[['crossing_intersections', 'cluster_labels']]
lost_pets_merged = lost_pets_merged.join(lost_pets_venues_sorted.set_index('crossing_intersections'), on='crossing_intersections')
lost_pets_merged = lost_pets.join(lost_pets_merged.reset_index().set_index('crossing_intersections'), on='crossing_intersections')
lost_pets_merged[['breed', 'sex', 'colour', 'crossing_intersections', 'cluster_labels', '1st Most Common Venue', '2nd Most Common Venue', '3rd Most Common Venue']].head(15) 


Unnamed: 0,breed,sex,colour,crossing_intersections,cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,DOMESTIC SH,Male,BRN TABBY,DANFORTH/VICTORIA PARK,1,Grocery Store,Coffee Shop,Women's Store
1,DOMESTIC SH,Unknown,BRN TABBY,KIPLING/401,0,Skating Rink,Coffee Shop,Bakery
2,DOMESTIC SH,Male,BRN TABBY,DUFFERIN/GLENCAIRN,2,Fast Food Restaurant,Mediterranean Restaurant,Gym / Fitness Center
3,DOMESTIC SH,Female,BRN TABBY,DUFFERIN/ROGERS,4,Gym,Pizza Place,Sporting Goods Shop
4,DOMESTIC SH,Female,ORG TABBY,DUFFERIN/EGLINTON,1,Park,Japanese Restaurant,Pharmacy
5,BICHON FRISE,Female,WHITE,KEELE/SHEPPARD,2,Pizza Place,Grocery Store,Park
6,YORKSHIRE TERR,Neutered Male,BLACK,KEELE/SHEPPARD,2,Pizza Place,Grocery Store,Park
7,YORKSHIRE TERR,Spayed Female,BLACK,KEELE/SHEPPARD,2,Pizza Place,Grocery Store,Park
8,AMER BULLDOG,Female,GREY,GREENWOOD/DANFORTH,1,Café,Coffee Shop,Ethiopian Restaurant
9,MIN PINSCHER,Male,BLACK,JANE/WILSON,1,Vietnamese Restaurant,Coffee Shop,Fried Chicken Joint


In [121]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
lost_pets_merged.reset_index(inplace=True)
map_of_toronto_cluster = folium.plugins.MarkerCluster().add_to(map_clusters)
for lat, lon, poi, cluster in zip(lost_pets_merged['cross_intersec_latitude']
                                  , lost_pets_merged['cross_intersec_longitude']
                                  , lost_pets_merged['crossing_intersections']
                                  , lost_pets_merged['cluster_labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_of_toronto_cluster)
       
map_clusters

In [122]:
lost_pets_merged.loc[lost_pets_merged['cluster_labels'] == 0, lost_pets_merged.columns[[1] + list(range(5, lost_pets_merged.shape[1]))]]

Unnamed: 0,date,colour,receiving_shelter,id,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,category,index,cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,2019-01-12,BRN TABBY,West Region,A824912,KIPLING/401,43.60257,-79.51854,Cat,9,0,Skating Rink,Coffee Shop,Bakery,Pizza Place,Breakfast Spot,Food & Drink Shop,Dim Sum Restaurant,Discount Store,Ethiopian Restaurant,Fast Food Restaurant


In [123]:
lost_pets_merged.loc[lost_pets_merged['cluster_labels'] == 1, lost_pets_merged.columns[[1] + list(range(5, lost_pets_merged.shape[1]))]]

Unnamed: 0,date,colour,receiving_shelter,id,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,category,index,cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2019-01-12,BRN TABBY,North Region,A824906,DANFORTH/VICTORIA PARK,43.69125,-79.28834,Cat,0,1,Grocery Store,Coffee Shop,Women's Store,Burger Joint,Gas Station,Food & Drink Shop,Liquor Store,Fast Food Restaurant,Discount Store,Clothing Store
4,2019-01-13,ORG TABBY,Found Animal Report,A824950,DUFFERIN/EGLINTON,43.69568,-79.4503,Cat,2,1,Park,Japanese Restaurant,Pharmacy,Discount Store,Grocery Store,Coffee Shop,Thai Restaurant,Bank,Bakery,Wings Joint
8,2019-01-11,GREY,Found Animal Report,A824891,GREENWOOD/DANFORTH,43.68148,-79.33234,Dog,6,1,Café,Coffee Shop,Ethiopian Restaurant,Beer Bar,Park,Gym / Fitness Center,Hostel,Karaoke Bar,Liquor Store,Dim Sum Restaurant
9,2019-01-12,BLACK,North Region,A824909,JANE/WILSON,43.72093,-79.50863,Dog,7,1,Vietnamese Restaurant,Coffee Shop,Fried Chicken Joint,Sandwich Place,Convenience Store,Clothing Store,Hockey Arena,Pharmacy,Fast Food Restaurant,Pizza Place
11,2019-01-13,BLACK,West Region,A824946,DUFFERIN/ST CLAIR,43.67788,-79.44304,Dog,5,1,Italian Restaurant,Breakfast Spot,Coffee Shop,Lounge,Vietnamese Restaurant,Convenience Store,Thai Restaurant,Sushi Restaurant,Mediterranean Restaurant,Martial Arts Dojo


In [124]:
lost_pets_merged.loc[lost_pets_merged['cluster_labels'] == 2, lost_pets_merged.columns[[1] + list(range(5, lost_pets_merged.shape[1]))]]

Unnamed: 0,date,colour,receiving_shelter,id,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,category,index,cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,2019-01-12,BRN TABBY,Found Animal Report,A824932,DUFFERIN/GLENCAIRN,43.707,-79.45316,Cat,3,2,Fast Food Restaurant,Mediterranean Restaurant,Gym / Fitness Center,Photography Lab,Pizza Place,Rental Car Location,Paintball Field,Latin American Restaurant,Grocery Store,Food & Drink Shop
5,2019-01-10,WHITE,North Region,A824812,KEELE/SHEPPARD,43.74481,-79.48639,Dog,8,2,Pizza Place,Grocery Store,Park,Spa,Cosmetics Shop,Fast Food Restaurant,Sandwich Place,Athletics & Sports,Vietnamese Restaurant,Music Venue
6,2019-01-10,BLACK,North Region,A824813,KEELE/SHEPPARD,43.74481,-79.48639,Dog,8,2,Pizza Place,Grocery Store,Park,Spa,Cosmetics Shop,Fast Food Restaurant,Sandwich Place,Athletics & Sports,Vietnamese Restaurant,Music Venue
7,2019-01-10,BLACK,North Region,A824814,KEELE/SHEPPARD,43.74481,-79.48639,Dog,8,2,Pizza Place,Grocery Store,Park,Spa,Cosmetics Shop,Fast Food Restaurant,Sandwich Place,Athletics & Sports,Vietnamese Restaurant,Music Venue


In [125]:
lost_pets_merged.loc[lost_pets_merged['cluster_labels'] == 3, lost_pets_merged.columns[[1] + list(range(5, lost_pets_merged.shape[1]))]]

Unnamed: 0,date,colour,receiving_shelter,id,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,category,index,cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,2019-01-13,WHITE,North Region,A817080,DOWNSVIEW PARK,43.72005,-79.50057,Dog,1,3,Vietnamese Restaurant,Bakery,Pharmacy,Café,Pizza Place,Outdoor Supply Store,Women's Store,Ethiopian Restaurant,Dessert Shop,Dim Sum Restaurant


In [126]:
lost_pets_merged.loc[lost_pets_merged['cluster_labels'] == 4, lost_pets_merged.columns[[1] + list(range(5, lost_pets_merged.shape[1]))]]

Unnamed: 0,date,colour,receiving_shelter,id,crossing_intersections,cross_intersec_latitude,cross_intersec_longitude,category,index,cluster_labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,2019-01-13,BRN TABBY,North Region,A824940,DUFFERIN/ROGERS,43.68557,-79.44611,Cat,4,4,Gym,Pizza Place,Sporting Goods Shop,Beer Store,Mexican Restaurant,Women's Store,Ethiopian Restaurant,Dessert Shop,Dim Sum Restaurant,Discount Store
