# Feasibility Study & Recommendations for Opening a Restaurant in Pune
## IBM Applied Data Science Capstone Project
### The Battle of Neighborhoods


Import the required modules

In [1]:
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import numpy as np
import pandas as pd
import folium
import requests
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

Get coordinates of all pincodes (zip code) of Pune city

In [2]:
pin_lat_long = []
for pin in range(411001,411062):
    print('Fetching geo coordinates for {}'.format(pin))
    address = str(pin) + ', Pune, India'
    geolocator = Nominatim(user_agent="pune_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    pin_lat_long.append([pin, latitude, longitude])
pin_lat_long

Fetching geo coordinates for 411001
Fetching geo coordinates for 411002
Fetching geo coordinates for 411003
Fetching geo coordinates for 411004
Fetching geo coordinates for 411005
Fetching geo coordinates for 411006
Fetching geo coordinates for 411007
Fetching geo coordinates for 411008
Fetching geo coordinates for 411009
Fetching geo coordinates for 411010
Fetching geo coordinates for 411011
Fetching geo coordinates for 411012
Fetching geo coordinates for 411013
Fetching geo coordinates for 411014
Fetching geo coordinates for 411015
Fetching geo coordinates for 411016
Fetching geo coordinates for 411017
Fetching geo coordinates for 411018
Fetching geo coordinates for 411019
Fetching geo coordinates for 411020
Fetching geo coordinates for 411021
Fetching geo coordinates for 411022
Fetching geo coordinates for 411023
Fetching geo coordinates for 411024
Fetching geo coordinates for 411025
Fetching geo coordinates for 411026
Fetching geo coordinates for 411027
Fetching geo coordinates for

[[411001, 18.5298629, 73.8760869],
 [411002, 18.5095844, 73.8585026],
 [411003, 18.5710074, 73.8383273],
 [411004, 18.5087265, 73.83086905279255],
 [411005, 18.4580909700108, 73.9293125124155],
 [411006, 18.5721375, 73.8788439],
 [411007, 18.521428, 73.8544541],
 [411008, 18.542318334707847, 73.80930474604361],
 [411009, 18.4853019, 73.8511287],
 [411010, 18.1157975, 75.0269762],
 [411011, 18.59592547652776, 73.93761890238277],
 [411012, 18.585276666666665, 73.83030593333334],
 [411013, 18.511166, 73.928155],
 [411014, 18.5627909, 73.91371135],
 [411015, 18.579194888898932, 73.88636319619278],
 [411016, 18.5291854, 73.8334444],
 [411017, 18.5741788, 73.7731743],
 [411018, 18.6332662, 73.81618749769493],
 [411019, 18.6582, 73.8046793],
 [411020, 18.55593608255426, 73.83683021403759],
 [411021, 18.55298685, 73.75500526891248],
 [411022, 18.521428, 73.8544541],
 [411023, 18.8250074, 74.3745183],
 [411024, 18.4402041, 73.7753538],
 [411025, 18.3993407, 73.7333619],
 [411026, 18.6349668, 73

Convert the geospatial coordinated for center of each pincode of Pune city into a dataframe

In [3]:
pune_pincodes = pd.DataFrame(pin_lat_long, columns=['pincode', 'lat', 'lng'])
pune_pincodes.head()

Unnamed: 0,pincode,lat,lng
0,411001,18.529863,73.876087
1,411002,18.509584,73.858503
2,411003,18.571007,73.838327
3,411004,18.508727,73.830869
4,411005,18.458091,73.929313


Visualize Pune pincodes on map

In [4]:
address = 'Pune, India'
geolocator = Nominatim(user_agent="pune_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_pune = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for ind in pune_pincodes.index:
    pin = pune_pincodes['pincode'][ind]
    lat = pune_pincodes['lat'][ind]
    lng = pune_pincodes['lng'][ind]
    label = '{}'.format(pin)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_pune)
    
map_pune

Set credentials for Foursquare to fetch data using Foursquare API

In [5]:
CLIENT_ID = 'E1FHC5NSP5KEE14G2CB0420DOCZEB3LMM2BYT0HCZWEJOUGU' # your Foursquare ID
CLIENT_SECRET = 'L2TXKCQNW5JPBSYYP0SH3BR4EHFPRASFHJQLNQ5PC5FEWXJD' # your Foursquare Secret
VERSION = '20200101' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: E1FHC5NSP5KEE14G2CB0420DOCZEB3LMM2BYT0HCZWEJOUGU
CLIENT_SECRET:L2TXKCQNW5JPBSYYP0SH3BR4EHFPRASFHJQLNQ5PC5FEWXJD


Part 1: Fetch venues/restaurants data using Foursquare API
<br>A) Construct URL's with geospatial cooridnates of each pincode and fetch upto 100 data points/venues from each pincode within the city
<br>B) Use requests to use API and fetch data
<br>C) Parse json and prune required features
<br>D) Convert the required data into data frame
<br>E) Convert dataframe into CSV and save the file

In [8]:
# Construct URLs. One URL for each pincode
LIMIT = 1000 # limit of number of venues returned by Foursquare API
radius = 2000 # define radius
fqs_urls = []
for ind in pune_pincodes.index:
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        pune_pincodes['lat'][ind], 
        pune_pincodes['lng'][ind], 
        radius, 
        LIMIT)
    fqs_urls.append(url)
    
fqs_urls[0:5]

['https://api.foursquare.com/v2/venues/explore?&client_id=E1FHC5NSP5KEE14G2CB0420DOCZEB3LMM2BYT0HCZWEJOUGU&client_secret=L2TXKCQNW5JPBSYYP0SH3BR4EHFPRASFHJQLNQ5PC5FEWXJD&v=20200101&ll=18.5298629,73.8760869&radius=2000&limit=1000',
 'https://api.foursquare.com/v2/venues/explore?&client_id=E1FHC5NSP5KEE14G2CB0420DOCZEB3LMM2BYT0HCZWEJOUGU&client_secret=L2TXKCQNW5JPBSYYP0SH3BR4EHFPRASFHJQLNQ5PC5FEWXJD&v=20200101&ll=18.5095844,73.8585026&radius=2000&limit=1000',
 'https://api.foursquare.com/v2/venues/explore?&client_id=E1FHC5NSP5KEE14G2CB0420DOCZEB3LMM2BYT0HCZWEJOUGU&client_secret=L2TXKCQNW5JPBSYYP0SH3BR4EHFPRASFHJQLNQ5PC5FEWXJD&v=20200101&ll=18.5710074,73.8383273&radius=2000&limit=1000',
 'https://api.foursquare.com/v2/venues/explore?&client_id=E1FHC5NSP5KEE14G2CB0420DOCZEB3LMM2BYT0HCZWEJOUGU&client_secret=L2TXKCQNW5JPBSYYP0SH3BR4EHFPRASFHJQLNQ5PC5FEWXJD&v=20200101&ll=18.5087265,73.83086905279255&radius=2000&limit=1000',
 'https://api.foursquare.com/v2/venues/explore?&client_id=E1FHC5NSP5K

In [None]:
# Actualy fetch data using Foursqauare API
success = 0
fqs_results_jsons = []

for ind,url in enumerate(fqs_urls):
    result = requests.get(url).json()
    if results['meta']['code'] == 200:
        success += 1
    print("Fetching ... {} ... success {}/{}".format(ind+1, success, ind+1))
    fqs_results_jsons.append(result)

In [None]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [None]:
# Convert fetched data from json to dataframe consisting of chosen features

pune_venues = []
for ind,jsons in enumerate(fqs_results_jsons):
    print('Processing .... {}'.format(ind))
    for res in range(min(jsons['response']['totalResults'],100)):
        venue = []
        venue.append(jsons['response']['groups'][0]['items'][res]['venue']['name'])
        venue.append(jsons['response']['groups'][0]['items'][res]['venue']['location']['lat'])
        venue.append(jsons['response']['groups'][0]['items'][res]['venue']['location']['lng'])
        try:
            venue.append(jsons['response']['groups'][0]['items'][res]['venue']['location']['postalCode'])
        except:
            venue.append(None)
        venue.append(jsons['response']['groups'][0]['items'][res]['venue']['categories'][0]['shortName'])
        pune_venues.append(venue)

Convert Foursquare venues into dataframe and save it to disk in CSV format 

In [None]:
fqs_venues = pd.DataFrame(pune_venues, columns=['name', 'lat', 'lng', 'pincode', 'type'])

In [None]:
fqs_venues.to_csv('fqs_venues.csv', index=False)

## Section 2: Get data from Zomato

1. Set API credential key for Zomato
2. Prepare search query based on coordinates of restaurants found in FourSquare API
3. Fetch data using requests and Zomato API
4. Convert the response JSON into dataframe
5. As zomato has limit of 1000 API calls, call multiple times (either on different days or using seperate credentials)
6. Save it to disk in CSV format so it can be fetched whenever needed for further analysis

In [10]:
# Prepare URLs
zomato_urls = []

for ind in fqs_venues.index:
    name = fqs_venues['name'][ind]
    lat = fqs_venues['lat'][ind]
    lng = fqs_venues['lng'][ind]
    url = ('https://developers.zomato.com/api/v2.1/search?q{}' + '&start=0&count=20&lat={}&lon={}&sort=real_distance').format(name,lat,lng)
    zomato_urls.append(url)

NameError: name 'fqs_venues' is not defined

Use zomato developer API to fetch restaurant data

In [9]:
zomato_results = []

headers = {'user-key': 'e2ef4f947cde32306783626bff7c4817'}
count = 0
for url in zomato_urls[0:999]:
    result = requests.get(url, headers = headers).json()
    zomato_results.append(result)
    print('Finished processing ... {}'.format(count))
    count += 1

headers = {'user-key': '7fd1f06ce6563edc630620a7b37ea406'}
count = 0
for url in zomato_urls[1000:1999]:
    result = requests.get(url, headers = headers).json()
    zomato_results.append(result)
    print('Finished processing ... {}'.format(count))
    count += 1

headers = {'user-key': 'a04982a1d6e7d52e614479972e27cd9c'}
count = 0
for url in zomato_urls[2000:2999]:
    result = requests.get(url, headers = headers).json()
    zomato_results.append(result)
    print('Finished processing ... {}'.format(count))
    count += 1

Extract required data fields from the resultant JSON

In [None]:
zomato_restaurants = []
for rest in zomato_results1:
    for r in range(0,20):
        venue = []
        try:
            venue.append(rest['restaurants'][r]['restaurant']['name'])
        except:
            continue
        venue.append(rest['restaurants'][r]['restaurant']['location']['latitude'])
        venue.append(rest['restaurants'][r]['restaurant']['location']['longitude'])
        venue.append(rest['restaurants'][r]['restaurant']['average_cost_for_two'])
        venue.append(rest['restaurants'][r]['restaurant']['price_range'])
        venue.append(rest['restaurants'][r]['restaurant']['user_rating']['aggregate_rating'])
        venue.append(rest['restaurants'][r]['restaurant']['user_rating']['votes'])
        venue.append(rest['restaurants'][r]['restaurant']['cuisines'])
        venue.append(rest['restaurants'][r]['restaurant']['highlights'])
        venue.append(rest['restaurants'][r]['restaurant']['all_reviews_count'])
        venue.append(rest['restaurants'][r]['restaurant']['photo_count'])
        venue.append(zomato_results1[0]['restaurants'][0]['restaurant']['has_table_booking'])
        venue.append(zomato_results1[0]['restaurants'][0]['restaurant']['has_online_delivery'])
        zomato_restaurants.append(venue)

Convert it into dataframe, remove duplicates and store it in CSV format

In [None]:
zomato = pd.DataFrame(zomato_restaurants, columns = ['name', 'latitude', 'longitude', 'average_cost_for_two', 'price_range', 'aggregate_rating', 'votes', 'cuisines', 'highlights', 'all_reviews_count', 'photo_count', 'has_table_booking', 'has_online_delivery'])
zomato.drop_duplicates(subset ="name", keep = 'first', inplace=True)
zomato.to_csv('zomato.csv', index=False)