### Predicting Coffee shop location in Delhi, India

#### Import Libraries

In [119]:
import numpy as np  
import pandas as pd 
import matplotlib as mpl
import matplotlib.pyplot as plt

# import k-means from clustering stage
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# library to handle requests
import requests 

# library for random number generation
import random 

# !conda install -c conda-forge geopy --yes 
# !conda install -c conda-forge ipywidgets --yes
# !conda install -c conda-forge folium=0.5.0 --yes

# module to convert an address into latitude and longitude values
from geopy.geocoders import Nominatim 

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize
import folium
import ipywidgets as widgets


print('Folium installed and imported!')

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

Folium installed and imported!


#### Reading data .csv files, from local path

In [120]:
delhi_district_file_path='./delhi_all.csv'
delhi_population_path='./delhi9_population.csv'
delhi_geo_path='./Delhi.geojson'

delhi_district=pd.read_csv(delhi_district_file_path)
delhi_population=pd.read_csv(delhi_population_path)

import json 
with open(delhi_geo_path, 'r') as j:
     delhi_geo = json.loads(j.read()) 

In [121]:
vars(district); 
state=district.value
selectedRow=delhi_district.loc[delhi_district['place_name']==district.value, :]

vars(selectedRow);

In [122]:
delhi_district

Unnamed: 0,key,place_name,admin_name1,latitude,longitude,accuracy
0,IN/110001,Connaught Place,New Delhi,28.633300,77.216700,
1,IN/110002,Darya Ganj,New Delhi,28.633300,77.250000,
2,IN/110003,Aliganj,New Delhi,28.575964,77.223188,
3,IN/110004,Rashtrapati Bhawan,New Delhi,28.614299,77.199458,
4,IN/110005,Lower Camp Anand Parbat,New Delhi,28.664941,77.174777,
...,...,...,...,...,...,...
74,IN/110091,Himmatpuri,New Delhi,28.603680,77.304441,
75,IN/110092,Shakarpur,New Delhi,28.627949,77.278621,
76,IN/110093,Nand Nagri A Block,New Delhi,28.693616,77.313834,
77,IN/110094,Gokulpuri,New Delhi,28.704770,77.282997,


In [123]:
geolocator = Nominatim(user_agent="foursquare_agent", format_string="%s, New delhi,IN")

latitude = selectedRow.loc[district.index,'latitude']
longitude =selectedRow.loc[district.index,'longitude']
location = geolocator.reverse("{}, {}".format(latitude, longitude))

#### Generating overlay folium map

In [124]:
# Delhi latitude and longitude values
delhi_map=folium.Map(location=[latitude, longitude], zoom_start=10.47)

# Add the color for the chloropleth:
delhi_map.choropleth(
 geo_data=delhi_geo,
 name='choropleth',
 data=delhi_population,
 columns=['District', 'Percentage'],
 key_on='feature.properties.Dist_Name',
 fill_color='YlGn',
 fill_opacity=0.7,
 line_opacity=0.2,
 legend_name='Population (%)'
)
# folium.LayerControl().add_to(delhi_map)

# I can add marker one by one on the map
for i in range(0,len(delhi_district)):
    folium.Marker([delhi_district.iloc[i]['latitude'], delhi_district.iloc[i]['longitude']], popup=folium.Popup(delhi_district.iloc[i]['place_name'], parse_html=True, max_width=100) ).add_to(delhi_map)

delhi_map

In [125]:
# @hidden_cell
CLIENT_ID = '' 
CLIENT_SECRET = '' 

VERSION = '20200501'
LIMIT = 100 # max only 50 result 
radius = 2000 # diameter to scann the neighbourhood.
search_query = 'market'

#### Loop to download all the venues with radius 2km

In [126]:
venues = []

for lat, long, neighborhood in zip(delhi_district['latitude'], delhi_district['longitude'], delhi_district['place_name']):
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)    
    
     # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))


KeyError: 'groups'

In [None]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

In [None]:
venues_df.groupby(["Neighborhood"]).count()

In [None]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

In [None]:
venues_df['VenueCategory'].unique()[:50]

In [None]:
# one hot encoding
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()

In [None]:
kl_grouped = kl_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped

In [None]:
len(kl_grouped[kl_grouped["Shopping Mall"] > 0])

In [None]:
kl_mall = kl_grouped[["Neighborhoods","Shopping Mall"]]
kl_mall.head()

#### Cluster Neighborhoods

In [None]:
# set number of clusters
kclusters = 3

kl_clustering = kl_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

In [None]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_mall.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [None]:
kl_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
kl_merged.head()

In [None]:
kl_df = pd.DataFrame({"Neighborhood": delhi_district.place_name,
                     'Latitude':delhi_district.latitude,
                     'Longitude':delhi_district.longitude})

kl_df.head()

In [None]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
kl_merged = kl_merged.join(kl_df.set_index("Neighborhood"), on="Neighborhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!

In [None]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], kl_merged['Neighborhood'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

## Clusters

Clustor 0

In [None]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]

Clustor 1

In [None]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Clustor 2

In [None]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

### Observations:

Most of the shopping malls are in clustor 1 areas, and and clustor 0 areas, we are considering shopping malls, due to footfall counts per day.
Opening new coffee shops, in clustor 1 areas(Shalimar Bagh, Shakarpur, Pitampura, Wazirpur Phase Iii) near or inside this shopping malls, would be most economical beneficial. 

### Future:

In this prediction, i have ***only*** considered shopping malls, for next accurate prediction, we may consider the coffee shops inside shopping mall. And also other business entities around the coffee shops like IT parks, Stations, Airport, Educational Institutions etc.