### Predicting Coffee shop location in Delhi, India

#### Import Libraries

In [135]:
import numpy as np  
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt

# import k-means from clustering stage
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# library to handle requests
import requests 

# library for random number generation
import random 

# !conda install -c conda-forge geopy --yes 
# !conda install -c conda-forge ipywidgets --yes
# !conda install -c conda-forge folium=0.5.0 --yes

# module to convert an address into latitude and longitude values
from geopy.geocoders import Nominatim 

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize
import folium
import ipywidgets as widgets


print('Folium installed and imported!')

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

Folium installed and imported!


#### Reading data .csv files, from local path

In [136]:
delhi_district_file_path='./delhi_all.csv'
delhi_population_path='./delhi9_population.csv'
delhi_geo_path='./Delhi.geojson'

delhi_district=pd.read_csv(delhi_district_file_path)
delhi_population=pd.read_csv(delhi_population_path)

import json 
with open(delhi_geo_path, 'r') as j:
     delhi_geo = json.loads(j.read()) 

In [137]:
vars(district); 
state=district.value
selectedRow=delhi_district.loc[delhi_district['place_name']==district.value, :]

vars(selectedRow);

In [138]:
delhi_district

Unnamed: 0,key,place_name,admin_name1,latitude,longitude,accuracy
0,IN/110001,Connaught Place,New Delhi,28.633300,77.216700,
1,IN/110002,Darya Ganj,New Delhi,28.633300,77.250000,
2,IN/110003,Aliganj,New Delhi,28.575964,77.223188,
3,IN/110004,Rashtrapati Bhawan,New Delhi,28.614299,77.199458,
4,IN/110005,Lower Camp Anand Parbat,New Delhi,28.664941,77.174777,
...,...,...,...,...,...,...
74,IN/110091,Himmatpuri,New Delhi,28.603680,77.304441,
75,IN/110092,Shakarpur,New Delhi,28.627949,77.278621,
76,IN/110093,Nand Nagri A Block,New Delhi,28.693616,77.313834,
77,IN/110094,Gokulpuri,New Delhi,28.704770,77.282997,


In [139]:
geolocator = Nominatim(user_agent="foursquare_agent", format_string="%s, New delhi,IN")

latitude = selectedRow.loc[district.index,'latitude']
longitude =selectedRow.loc[district.index,'longitude']
location = geolocator.reverse("{}, {}".format(latitude, longitude))

#### Generating overlay folium map

In [140]:
# Delhi latitude and longitude values
delhi_map=folium.Map(location=[latitude, longitude], zoom_start=10.47)

# Add the color for the chloropleth:
delhi_map.choropleth(
 geo_data=delhi_geo,
 name='choropleth',
 data=delhi_population,
 columns=['District', 'Percentage'],
 key_on='feature.properties.Dist_Name',
 fill_color='YlGn',
 fill_opacity=0.7,
 line_opacity=0.2,
 legend_name='Population (%)'
)
# folium.LayerControl().add_to(delhi_map)

# I can add marker one by one on the map
for i in range(0,len(delhi_district)):
    folium.Marker([delhi_district.iloc[i]['latitude'], delhi_district.iloc[i]['longitude']], popup=folium.Popup(delhi_district.iloc[i]['place_name'], parse_html=True, max_width=100) ).add_to(delhi_map)

delhi_map

In [141]:
# @hidden_cell
CLIENT_ID = '' 
CLIENT_SECRET = '' 

VERSION = '20200501'
LIMIT = 100 # max only 50 result 
radius = 2000 # diameter to scann the neighbourhood.
search_query = 'market'

#### Loop to download all the venues with radius 2km

In [142]:
venues = []

for lat, long, neighborhood in zip(delhi_district['latitude'], delhi_district['longitude'], delhi_district['place_name']):
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)    
    
     # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))


In [143]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(2990, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Connaught Place,28.6333,77.2167,Wenger's,28.633412,77.218292,Bakery
1,Connaught Place,28.6333,77.2167,Connaught Place | कनॉट प्लेस (Connaught Place),28.632731,77.220018,Plaza
2,Connaught Place,28.6333,77.2167,HOTEL SARAVANA BHAVAN,28.632319,77.216445,South Indian Restaurant
3,Connaught Place,28.6333,77.2167,Naturals Ice Cream,28.634455,77.222139,Ice Cream Shop
4,Connaught Place,28.6333,77.2167,Fabindia,28.632012,77.217729,Clothing Store


In [144]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A F Rajokari,61,61,61,61,61,61
Adarsh Nagar,5,5,5,5,5,5
Aliganj,81,81,81,81,81,81
Alipur,4,4,4,4,4,4
Andrews Ganj,100,100,100,100,100,100
...,...,...,...,...,...,...
Vasant Kunj,62,62,62,62,62,62
Vishnu Garden,31,31,31,31,31,31
Wazirpur Phase Iii,37,37,37,37,37,37
Zafrabad,5,5,5,5,5,5


In [145]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 203 uniques categories.


In [146]:
venues_df['VenueCategory'].unique()[:50]

array(['Bakery', 'Plaza', 'South Indian Restaurant', 'Ice Cream Shop',
       'Clothing Store', 'Food Truck', 'Coffee Shop', 'Deli / Bodega',
       'Café', 'Indian Restaurant', 'Hotel', 'Bistro', 'Lounge',
       'Molecular Gastronomy Restaurant', 'Bar',
       'North Indian Restaurant', 'Chinese Restaurant',
       'Tibetan Restaurant', 'Restaurant', 'Portuguese Restaurant',
       'Gastropub', 'Food & Drink Shop', 'BBQ Joint', 'Asian Restaurant',
       'Italian Restaurant', 'Beer Garden', 'Donut Shop', 'Spa',
       'Tea Room', 'Korean Restaurant', 'Smoke Shop',
       'Fast Food Restaurant', 'Food', 'Mediterranean Restaurant',
       'Arcade', 'Theater', 'Art Gallery', 'Indian Chinese Restaurant',
       'Pub', 'Historic Site', 'Concert Hall', 'Snack Place',
       'Breakfast Spot', 'Performing Arts Venue', 'Pizza Place', 'Motel',
       'Japanese Restaurant', 'Stadium', 'Cricket Ground',
       'History Museum'], dtype=object)

In [147]:
# one hot encoding
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()

(2990, 204)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Udupi Restaurant,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Water Park,Wings Joint,Women's Store
0,Connaught Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Connaught Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Connaught Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Connaught Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Connaught Place,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [148]:
kl_grouped = kl_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped

(77, 204)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Udupi Restaurant,University,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Water Park,Wings Joint,Women's Store
0,A F Rajokari,0.00,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.032787,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.016393,0.0,0.0
1,Adarsh Nagar,0.60,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0
2,Aliganj,0.00,0.0,0.012346,0.012346,0.0,0.0,0.0,0.0,0.000000,...,0.012346,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0
3,Alipur,0.25,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0
4,Andrews Ganj,0.00,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,Vasant Kunj,0.00,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.016129,0.0,0.0,0.000000,0.0,0.0
73,Vishnu Garden,0.00,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0
74,Wazirpur Phase Iii,0.00,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.027027,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0
75,Zafrabad,0.00,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,...,0.200000,0.0,0.0,0.0,0.200000,0.0,0.0,0.000000,0.0,0.0


In [149]:
len(kl_grouped[kl_grouped["Shopping Mall"] > 0])

19

In [150]:
kl_mall = kl_grouped[["Neighborhoods","Shopping Mall"]]
kl_mall.head()

Unnamed: 0,Neighborhoods,Shopping Mall
0,A F Rajokari,0.032787
1,Adarsh Nagar,0.0
2,Aliganj,0.012346
3,Alipur,0.0
4,Andrews Ganj,0.0


#### Cluster Neighborhoods

In [151]:
# set number of clusters
kclusters = 3

kl_clustering = kl_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [152]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_mall.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [153]:
kl_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
kl_merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,A F Rajokari,0.032787,0
1,Adarsh Nagar,0.0,2
2,Aliganj,0.012346,2
3,Alipur,0.0,2
4,Andrews Ganj,0.0,2


In [154]:
kl_df = pd.DataFrame({"Neighborhood": delhi_district.place_name,
                     'Latitude':delhi_district.latitude,
                     'Longitude':delhi_district.longitude})

kl_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Connaught Place,28.6333,77.2167
1,Darya Ganj,28.6333,77.25
2,Aliganj,28.575964,77.223188
3,Rashtrapati Bhawan,28.614299,77.199458
4,Lower Camp Anand Parbat,28.664941,77.174777


In [155]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
kl_merged = kl_merged.join(kl_df.set_index("Neighborhood"), on="Neighborhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!

(77, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,A F Rajokari,0.032787,0,28.513004,77.096195
1,Adarsh Nagar,0.0,2,28.7556,77.1667
2,Aliganj,0.012346,2,28.575964,77.223188
3,Alipur,0.0,2,28.8,77.15
4,Andrews Ganj,0.0,2,28.56268,77.227582


In [156]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

(77, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,A F Rajokari,0.032787,0,28.513004,77.096195
17,D D A Munirka,0.040541,0,28.556133,77.167771
62,Punjabi Bagh,0.035714,0,28.661975,77.124156
30,Himmatpuri,0.041667,0,28.603680,77.304441
45,Malviya Nagar,0.030000,0,28.534233,77.209447
...,...,...,...,...,...
23,Gokulpuri,0.000000,2,28.704770,77.282997
22,Gandhi Nagar,0.000000,2,28.659608,77.267493
21,Flatted Factories Complex,0.000000,2,28.648132,77.203925
19,Delhi Cantt,0.000000,2,28.596128,77.158738


In [157]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], kl_merged['Neighborhood'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [158]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

## Clusters

Clustor 0

In [159]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,A F Rajokari,0.032787,0,28.513004,77.096195
17,D D A Munirka,0.040541,0,28.556133,77.167771
62,Punjabi Bagh,0.035714,0,28.661975,77.124156
30,Himmatpuri,0.041667,0,28.60368,77.304441
45,Malviya Nagar,0.03,0,28.534233,77.209447
33,J 6block Rajouri Garden,0.051724,0,28.64198,77.114766
41,Kidwai Nagar,0.024096,0,28.571861,77.211055
59,Patel Nagar,0.043478,0,28.655418,77.16462
72,Vasant Kunj,0.048387,0,28.529312,77.148444
73,Vishnu Garden,0.064516,0,28.651935,77.094072


Clustor 1

In [160]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
60,Pitampura,0.1,1,28.69897,77.138723
74,Wazirpur Phase Iii,0.081081,1,28.697544,77.16044
69,Shalimar Bagh,0.117647,1,28.716253,77.156259
67,Shakarpur,0.1,1,28.627949,77.278621


Clustor 2

In [161]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
49,Najafgarh,0.0,2,28.612500,76.984700
50,Nand Nagri A Block,0.0,2,28.693616,77.313834
48,Nagloi,0.0,2,28.682044,77.067561
46,Mangolpuri Block A,0.0,2,28.692632,77.094526
51,Nangal Rava,0.0,2,28.608748,77.109683
...,...,...,...,...,...
23,Gokulpuri,0.0,2,28.704770,77.282997
22,Gandhi Nagar,0.0,2,28.659608,77.267493
21,Flatted Factories Complex,0.0,2,28.648132,77.203925
19,Delhi Cantt,0.0,2,28.596128,77.158738


### Observations:

Most of the shopping malls are in clustor 1 areas, and and clustor 0 areas, we are considering shopping malls, due to footfall counts per day.
Opening new coffee shops, in clustor 1 areas(Shalimar Bagh, Shakarpur, Pitampura, Wazirpur Phase Iii) near or inside this shopping malls, would be most economical beneficial. 

### Future:

In this prediction, i have ***only*** considered shopping malls, for next accurate prediction, we may consider the coffee shops inside shopping mall. And also other business entities around the coffee shops like IT parks, Stations, Airport, Educational Institutions etc.