# Let's explore Chennai using FourSquare 

In [1]:
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import json
import requests
import missingno

import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim
import folium

from sklearn.cluster import KMeans
print('Libraries Installed!')

Libraries Installed!


In [2]:
#let's try to see where we're dealing
chennai_lat, chennai_long = 13.0827, 80.2707

chennai_map = folium.Map(location=[chennai_lat, chennai_long], zoom_start=12)
chennai_map

In [3]:
#great, now let's look at the Areas in Chennai from the csv file
data = pd.read_csv("Chennai_geo.csv")
data.drop(columns = ['Unnamed: 0'], inplace=True)
print('Shape: ',data.shape)
data.head()

Shape:  (475, 3)


Unnamed: 0,Segment,Latitude,Longitude
0,Broadway,13.087441,80.283837
1,CLS,13.083392,80.278913
2,Central Railway Station,13.082007,80.275598
3,Pallavan Illam,13.075026,80.276241
4,Chief Secretariat,13.070005,80.273071


In [72]:
#let's spot a few segments on the map
my_segments = ['Vani Mahal','Alwarpet','Egmore','Anna Nagar Rountana','Nerkundram']

my_segments_df = data[data['Segment'].isin(my_segments)]
for lat, lng, label in zip(my_segments_df.Latitude, my_segments_df.Longitude, my_segments_df.Segment):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='red',
        popup=label,
        fill = True,
        fill_color='pink',
        fill_opacity=0.6
    ).add_to(chennai_map)
chennai_map

# Let's load FourSquare data for thes 475 segments in Chennai

In [37]:
#loading FourSquare credentials
with open('credentials.json') as f:
    cred = json.load(f)
    
print('Loaded Credentials!')

Loaded Credentials!


In [None]:
#testing out for chennai's lat and long to explore the json from FourSquare
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
             cred['CLIENT_ID'], 
            cred['CLIENT_SECRET'], 
            cred['VERSION'], 
            chennai_lat,chennai_long, 
            500, 
            100)

results = requests.get(url).json()
#results

In [5]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [41]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,National Durbar Hotel,Indian Restaurant,13.081301,80.270601
1,Pasha,Nightclub,13.080774,80.272894
2,Hotel Buhari,Indian Restaurant,13.081526,80.274286
3,Higginbothams,Bookstore,13.08319,80.275168
4,Hotel Saravana Bhavan,Vegetarian / Vegan Restaurant,13.082935,80.275032


In [42]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

16 venues were returned by Foursquare.


In [6]:
#now let's gather ino for all the 475 segments in chennai
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    no_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        try:
            print(name)
            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                 cred['CLIENT_ID'], 
                cred['CLIENT_SECRET'], 
                cred['VERSION'],
                lat,
                lng,
                radius, 
                LIMIT)

            # make the GET request
            results = requests.get(url).json()["response"]['groups'][0]['items']

            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])
        
        except :
            no_list.append(name)
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                          'Neighborhood Latitude', 
                          'Neighborhood Longitude', 
                          'Venue', 
                          'Venue Latitude', 
                      'Venue Longitude', 
                      'Venue Category']
    print('Not enough info for {} places in Toronto'.format(len(no_list)))
    return(nearby_venues)

In [45]:
chennai_venues = getNearbyVenues(names=data['Segment'],
                                   latitudes=data['Latitude'],
                                   longitudes=data['Longitude']
                                  )
chennai_venues.head()

Broadway
CLS
Central Railway Station
Pallavan Illam
Chief Secretariat
TVS Mount Road
Gemini
Sun Theatre
Vani Mahal
Panagal Park
Hindi Prachar Sabha
Venkat Narayana Road
Thiyagaraya Nagar
Pondy Bazaar
Teynampet Signal
Alwarpet
Isaballa Hospital
Vivekanandha College
V.M. Street
Kalyani Hospital
Vivekanandha House
Thousand lights mosque
Royapettai Police Station
Royapettai Hospital
Kasturi Bai Hospital
Pookadai / Pal Hospital
Moore Market
Dasaprakash
Neyveli House
Pachayappan College
Aminjikarai
Anna Nagar Police Station
Albert Theatre
Sterling Road
Loyola College
Liberty
Trustpuram
Vadapalani
Vadapalani Police Station
DMS
Anna Arivalayam
Muniyapillai Chathiram
Saidapet Bus Terminus
Saidapettai Bridge
Saidapettai Court / Chinnamalai
Raj Bhavan / Concord
Chellammal College
SPIC
Guindy Industrial estate
Pattullas Road
Wesley High School
Lloyds Road (PFO)
Swagat Hotel
Samskirutha College
Luz
Mandaiveli Post Office
Mandaiveli
Anna Square
Chepauk
Kannagi Silai
Thiruvallikeni
Kalaivanar Arangam

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Broadway,13.087441,80.283837,Gujarati Mandal,13.089637,80.28577,Indian Restaurant
1,Broadway,13.087441,80.283837,Parrys,13.088422,80.283283,Market
2,Broadway,13.087441,80.283837,Hotel Saravana Bhavan,13.088697,80.284796,Indian Restaurant
3,Broadway,13.087441,80.283837,gopal dairy,13.088502,80.285101,Snack Place
4,Broadway,13.087441,80.283837,Murugan Idli Shop,13.088824,80.287842,Asian Restaurant


In [46]:
chennai_venues.shape

(3673, 7)

In [7]:
#so that I don't have to use FourSquare APIs again
#chennai_venues.to_csv('Chennai_venues.csv')
chennai_venues = pd.read_csv('Chennai_venues.csv')
chennai_venues.drop(columns=['Unnamed: 0'],inplace= True)

In [8]:
chennai_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Broadway,13.087441,80.283837,Gujarati Mandal,13.089637,80.28577,Indian Restaurant
1,Broadway,13.087441,80.283837,Parrys,13.088422,80.283283,Market
2,Broadway,13.087441,80.283837,Hotel Saravana Bhavan,13.088697,80.284796,Indian Restaurant
3,Broadway,13.087441,80.283837,gopal dairy,13.088502,80.285101,Snack Place
4,Broadway,13.087441,80.283837,Murugan Idli Shop,13.088824,80.287842,Asian Restaurant


In [9]:
#let's explore the venues 

#isolating restaurants
res = chennai_venues[chennai_venues['Venue Category'].str.contains('Restaurant*')==True]

len(res['Venue Category'].unique())

35

In [10]:
#let's explore the rating file
ratings = pd.read_csv("Zomato_Chennai_2020.csv")
print(ratings.shape)
ratings.head()

(12032, 12)


Unnamed: 0,Zomato URL,Name of Restaurant,Address,Location,Cuisine,Top Dishes,Price for 2,Dining Rating,Dining Rating Count,Delivery Rating,Delivery Rating Count,Features
0,https://www.zomato.com/chennai/yaa-mohaideen-b...,Yaa Mohaideen Briyani,"336 & 338, Main Road, Pallavaram, Chennai",Pallavaram,['Biryani'],"['Bread Halwa', ' Chicken 65', ' Mutton Biryan...",500.0,4.3,1500,4.3,9306,"['Home Delivery', 'Indoor Seating']"
1,https://www.zomato.com/chennai/sukkubhai-biriy...,Sukkubhai Biriyani,"New 14, Old 11/3Q, Railway Station Road, MKN ...",Alandur,"['Biryani', ' North Indian', ' Mughlai', ' Des...","['Beef Biryani', ' Beef Fry', ' Paratha', ' Pa...",1000.0,4.4,3059,4.1,39200,"['Home Delivery', 'Free Parking', 'Table booki..."
2,https://www.zomato.com/chennai/ss-hyderabad-bi...,SS Hyderabad Biryani,"98/339, Arcot Road, Opposite Gokulam Chit Fun...",Kodambakkam,"['Biryani', ' North Indian', ' Chinese', ' Ara...","['Brinjal Curry', ' Tandoori Chicken', ' Chick...",500.0,4.3,1361,4.4,10500,"['Home Delivery', 'Indoor Seating']"
3,https://www.zomato.com/chennai/kfc-perambur,KFC,"10, Periyar Nagar, 70 Feet Road, Near Sheeba ...",Perambur,"['Burger', ' Fast Food', ' Finger Food', ' Bev...",['Zinger Burger'],500.0,4.0,1101,4.0,11200,"['Home Delivery', 'Free Parking', 'Card Upon D..."
4,https://www.zomato.com/chennai/tasty-kitchen-p...,Tasty Kitchen,"135B, SRP Colony, Peravallur, Near Perambur, ...",Perambur,"['Chinese', ' Biryani', ' North Indian', ' Che...","['Mutton Biryani', ' Chicken Rice', ' Tomato R...",450.0,4.2,617,4.1,22400,"['Home Delivery', 'Indoor Seating']"


In [69]:
drop_columns = ['Zomato URL', 'Address', 'Cuisine', 'Top Dishes', 'Price for 2', 'Delivery Rating', 'Delivery Rating Count', 'Features']
ratings.drop(columns= drop_columns, inplace=True)
ratings.shape

(12032, 4)

In [11]:
#let's see how many venues info we have
res[res['Venue'].isin(ratings['Name of Restaurant'])]

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
2,Broadway,13.087441,80.283837,Hotel Saravana Bhavan,13.088697,80.284796,Indian Restaurant
4,Broadway,13.087441,80.283837,Murugan Idli Shop,13.088824,80.287842,Asian Restaurant
8,CLS,13.083392,80.278913,Hotel Saravana Bhavan,13.082935,80.275032,Vegetarian / Vegan Restaurant
17,CLS,13.083392,80.278913,Dindigul Thalappakatti,13.082800,80.274558,Indian Restaurant
22,Central Railway Station,13.082007,80.275598,Hotel Saravana Bhavan,13.082935,80.275032,Vegetarian / Vegan Restaurant
...,...,...,...,...,...,...,...
3579,TVS Mount road,13.059068,80.259451,Wangs Kitchen,13.054777,80.258069,Chinese Restaurant
3616,ITI,13.100650,80.165219,Meat And Eat,13.101032,80.162470,Fast Food Restaurant
3625,Kallarai,13.049630,80.104666,McDonald's,13.051154,80.107865,Fast Food Restaurant
3627,Kallarai,13.049630,80.104666,Transit,13.048302,80.104837,Fast Food Restaurant


In [14]:
chennai_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
11th Main Road thirumangalam,19,19,19,19,19,19
12th Main Road thirumangalam,15,15,15,15,15,15
70 Feet Road,7,7,7,7,7,7
AIR,4,4,4,4,4,4
AMS hospital/Sathya Studios,4,4,4,4,4,4
...,...,...,...,...,...,...
War Memorial,7,7,7,7,7,7
Welcome Colony,6,6,6,6,6,6
Wesley High School,45,45,45,45,45,45
Wimco,1,1,1,1,1,1


In [15]:
print('There are {} unique categories'.format(len(chennai_venues['Venue Category'].unique())))

There are 228 unique categories


In [16]:
#encode Venue Categories
cv = pd.get_dummies(chennai_venues[['Venue Category']],prefix="",prefix_sep="")

#toronto_venues.reset_index(inplace=True)
cv['Neighborhood']=chennai_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [cv.columns[-1]] + list(cv.columns[:-1])
cv = cv[fixed_columns]
print(cv.shape)
cv.head()

(3673, 228)


Unnamed: 0,Zoo Exhibit,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,Airport Terminal,American Restaurant,Amphitheater,Arcade,...,Vacation Rental,Vegetarian / Vegan Restaurant,Video Store,Watch Shop,Water Park,Whisky Bar,Wine Shop,Women's Store,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
cv_grouped = cv.groupby('Neighborhood').mean().reset_index()
print(cv_grouped.shape)
cv_grouped.head()

(422, 228)


Unnamed: 0,Neighborhood,Zoo Exhibit,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,Airport Terminal,American Restaurant,Amphitheater,...,Vacation Rental,Vegetarian / Vegan Restaurant,Video Store,Watch Shop,Water Park,Whisky Bar,Wine Shop,Women's Store,Yoga Studio,Zoo
0,11th Main Road thirumangalam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,...,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,12th Main Road thirumangalam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,70 Feet Road,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,AIR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,AMS hospital/Sathya Studios,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
#merge FourSquare data into this tv_grouped
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [113]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = cv_grouped['Neighborhood']

for ind in np.arange(cv_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(cv_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,11th Main Road thirumangalam,Indian Restaurant,Pizza Place,Fast Food Restaurant,Indian Sweet Shop,Metro Station
1,12th Main Road thirumangalam,Indian Restaurant,Pizza Place,Mobile Phone Shop,Café,Print Shop
2,70 Feet Road,Ice Cream Shop,Chettinad Restaurant,ATM,Snack Place,Fast Food Restaurant
3,AIR,Beach,Sculpture Garden,Bar,Lighthouse,Zoo
4,AMS hospital/Sathya Studios,Hotel,Motel,Performing Arts Venue,Burger Joint,Electronics Store


In [114]:
#Cluseting 
# set number of clusters
kclusters = 3

cv_cluster = cv_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cv_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[4:18] 

array([0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0])

In [115]:
neighborhoods_venues_sorted.columns

Index(['Neighborhood', '1st Most Common Venue', '2nd Most Common Venue',
       '3rd Most Common Venue', '4th Most Common Venue',
       '5th Most Common Venue'],
      dtype='object')

In [116]:
# add clustering labels
neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_

df_merged = data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
df_merged = df_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Segment')

df_merged.head() # check the last columns!

Unnamed: 0,Segment,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Cluster Labels
0,Broadway,13.087441,80.283837,Indian Restaurant,Market,Snack Place,Asian Restaurant,Train Station,2.0
1,CLS,13.083392,80.278913,Indian Restaurant,Train Station,Bus Station,Hotel,Bookstore,0.0
2,Central Railway Station,13.082007,80.275598,Indian Restaurant,Train Station,Bookstore,Hotel,Nightclub,0.0
3,Pallavan Illam,13.075026,80.276241,Golf Course,Train,Train Station,Park,General Travel,0.0
4,Chief Secretariat,13.070005,80.273071,Multiplex,Hotel,Electronics Store,Flea Market,Indian Restaurant,0.0


In [117]:
df_merged.shape

(475, 9)

In [118]:
df_merged['Cluster Labels'].unique()

array([ 2.,  0., nan,  1.])

In [119]:
df_merged = df_merged[df_merged['Cluster Labels'].isin([0.0,1.0,2.0])]

In [120]:
df_merged['Latitude'] = df_merged['Latitude'][df_merged['Latitude']!='`']
df_merged.dropna(inplace = True)

In [122]:
# create map
map_clusters = folium.Map(location=[chennai_lat, chennai_long], zoom_start=11)

# set color scheme for the clusters
rainbow=['red', 'green', 'purple']
print(rainbow)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Segment'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    #print(int(cluster),rainbow[int(cluster)])
    folium.CircleMarker( [lat, lon],
        radius=5,
        popup=label,
       color=rainbow[int(cluster)],
        fill=True,
       fill_color=rainbow[int(cluster)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

['red', 'green', 'purple']
