## Dependencies

In [None]:
# For collecting data from Zomato API
import requests
import urllib.parse as up
import json

# For Threading
import threading

# For writing data into .csv file
import pandas as pd

## Functions

In [None]:
def tryJson(site):
    '''
    param: response object
    return: json object
    '''
    try:
        js = json.loads(site.content.decode('utf-8'))
    except:
        js = None
    return js


def getDetailsFromLocation(loc_name):
    '''
    param: String containing location
    
    Creates URL with location name as parameter and retrieves response object
    Converts repsonse object to JSON object and collects data
    
    return: Entity ID, Entity Type, Latitude, Longitude of location
    '''
    loc_query = up.urlencode({'query': loc_name})

    url = base_url + services['loc'] + '?'
    url += api_header + '&' + loc_query
    
    site = requests.get(url)
    js = tryJson(site)

    city_name = js['location_suggestions'][0]['city_name']
    city_id = js['location_suggestions'][0]['city_id']
    entity_id = js['location_suggestions'][0]['entity_id']
    entity_type = js['location_suggestions'][0]['entity_type']
    lat = js['location_suggestions'][0]['latitude']
    long = js['location_suggestions'][0]['longitude']
    
    return entity_id, entity_type, lat, long    


def getRestaurantData(res_url):
    '''
    param: URL containing containing restaurant ID
    
    Passes URL and retrieves response object
    Converts repsonse object to JSON object and collects data
    
    return: Dictionary containing restaurant data
    '''
    site = requests.get(res_url)
    js = tryJson(site)

    if not js is None:
        res_data_dict = dict()
        res_data_dict['name'] = js['name']
        res_data_dict['id'] = js['R']['res_id']
        res_data_dict['url'] = js['url']
        res_data_dict['location'] = js['location']['address']
        res_data_dict['latitude'] = js['location']['latitude']
        res_data_dict['longitude'] = js['location']['longitude']
        res_data_dict['cuisines'] = js['cuisines']
        res_data_dict['average_cost_for_two'] = js['average_cost_for_two']
        res_data_dict['price_range'] = js['price_range']
        res_data_dict['user_rating'] = js['user_rating']['aggregate_rating']
        
    return res_data_dict


# removed search_query as parameter
def requestData(entity_id, entity_type, start, sort, order, printData = True):
    '''
    params: start index, entity ID, entity type
    
    Takes input parameters from user and creates URL
    Retrieves response object and converts it into JSON object
    Retrieves restaurant ID from JSON object
    Appends restaurant IDs to an array
    
    return: None
    '''
    search_query = dict()
    
    # location id
    search_query['entity_id'] = entity_id
    
    # location type
    search_query['entity_type'] = entity_type
    
    # fetch results after offset
    search_query['start'] = start    
    
    # max number of results to display
    search_query['count'] = count  # input('Enter integer (1 - 20): ')
    
    # sort restaurants by 
    search_query['sort'] = sort    # input('rating/cost/real_distance: ')
    
    # used with 'sort' parameter to define ascending / descending
    search_query['order'] = order    # input('asc/desc: ')

    search_query = up.urlencode(search_query)

    search_url = base_url + services['search'] + '?'
    search_url += api_header + '&' + search_query

    site = requests.get(search_url)
    js = tryJson(site)

    if js['results_found'] > 0:

        for x in js['restaurants']:
            res_name = x['restaurant']['name']
            res_id = x['restaurant']['R']['res_id']
            
            if res_id not in df_id_arr:    # Eliminates redundant data by comparing with DataFrame
                res_id_arr.append(res_id)
                if printData:
                    print(res_id, '\t', res_name)
    else:
        print("No restaurants found")
        

In [None]:
'''
Your API Key goes here. You can request one at https://developers.zomato.com/api 
''' 
API_KEY = 'abcde12345'
api_header = up.urlencode({'apikey': API_KEY})

services = {'loc': 'locations',
           'loc_det': 'location_details',
           'res': 'restaurant',
           'cuis': 'cuisines',
           'search': 'search'}

base_url = 'https://developers.zomato.com/api/v2.1/'

# Collecting data

### Create DataFrame from existing .csv file

In [None]:
filename = 'Data/tomato_data.csv'

In [None]:
csv_df = pd.read_csv(filename, encoding = 'utf-8')
df_id_arr = list(csv_df['id'])
print("No of restuarants in .csv file:", len(df_id_arr))
csv_df.tail()

In [None]:
res_id_arr = []    # Holds restuarant IDs

### Collect Restaurant ID

In [None]:
# Enter location name
loc_name = input() 

entity_id, entity_type, lat, long = getDetailsFromLocation(loc_name)

sort = input('Sort: rating/cost/real_distance: ')    # sort
order = input('Order: asc/desc: ')                   # desc0
printData = True    # Option to print restaurant ID, name

for x in range(0, 100, 20):
    print("*****", x, "*****")
    requestData(entity_id, entity_type, x, sort, order, printData)

In [None]:
print('Number of restaurants:', len(res_id_arr))

### Collect Restaurant data from ID

In [None]:
res_url = base_url + services['res'] + '?'
res_data_arr = []

first = len(res_id_arr) // 3    # 1/3 of array
second = (len(res_id_arr) - first) // 2    # 2/3 of array
third = len(res_id_arr) - second    # 3/3 of array

def collectDataFirst(first):
    res_url = base_url + services['res'] + '?'
    j = 0
    for res_id in res_id_arr[: first]:
        print(j, 'Collecting data for res_id: ', res_id)
        res_url += api_header + '&' + up.urlencode({'res_id': res_id})
        res_data = getRestaurantData(res_url)
        res_data_arr.append(res_data)
        j += 1

        
def collectDataSecond(second):
    res_url = base_url + services['res'] + '?'
    j = second
    for res_id in res_id_arr[second: third]:
        print(j, 'Collecting data for res_id: ', res_id)
        res_url += api_header + '&' + up.urlencode({'res_id': res_id})
        res_data = getRestaurantData(res_url)
        res_data_arr.append(res_data)
        j += 1        

        
def collectDataThird(third):
    res_url = base_url + services['res'] + '?'
    j = third
    for res_id in res_id_arr[third: ]:
        print(j, 'Collecting data for res_id: ', res_id)
        res_url += api_header + '&' + up.urlencode({'res_id': res_id})
        res_data = getRestaurantData(res_url)
        res_data_arr.append(res_data)
        j += 1   


### Threading to improve performance

In [None]:
%%time

t1 = threading.Thread(target = collectDataFirst, args = (first,)) 
t2 = threading.Thread(target = collectDataSecond, args = (second, )) 
t3 = threading.Thread(target = collectDataThird, args = (third,))

t1.start()
t2.start()
t3.start()

t1.join()
t2.join()
t3.join()

### Create DataFrame to be added to .csv file

In [None]:
df_name = []
df_id = []
df_location = []
df_cuisines = []
df_avg_cost = []
df_price_range = []
df_rating = []

In [None]:
for restaurant in res_data_arr:
    
    df_name.append(restaurant['name'])
    df_id.append(restaurant['id'])
    df_location.append(restaurant['location'])
    df_cuisines.append(restaurant['cuisines'])
    df_avg_cost.append(restaurant['average_cost_for_two'])
    df_price_range.append(restaurant['price_range'])
    df_rating.append(restaurant['user_rating'])
    
    '''
    for detail in restaurant:
        print('{}: {}'.format(detail, restaurant[detail]))
    print('\n' * 2)
    '''

In [None]:
df = pd.DataFrame({'id': df_id, 
                   'name': df_name,
                  'location': df_location,
                  'cuisines': df_cuisines,
                  'avg_cost': df_avg_cost,
                  'price_range': df_price_range,
                  'rating': df_rating})

In [None]:
df.tail()

### Write DataFrame to .csv file

In [None]:
prevent_from_rewriting = False    # Flag to prevent accidentally rewriting data into .csv file

In [None]:
#df.to_csv('tomato_data.xlsx', header = True, index=False)

if prevent_from_rewriting == False:
    with open(filename, 'a') as f:
        df.to_csv(f, header=False, index = False)
else:
    print("Data has already been written into .csv file")
prevent_from_rewriting = True