### Import required libraries

In [39]:
import json
import csv
import pandas as pd

from datetime import datetime

### 1) Extract Restaurant Data
Extract required fields from json file and store into restaurants.csv

In [2]:
with open('restaurant_data.json', 'r', encoding="utf8") as json_file: # encoding is used to decode the raw data file
    data = json.load(json_file)

In [3]:
# write to the csv with similar encoding
with open('restaurant_data.csv', 'w', newline='', encoding="utf8") as csv_file:
    # specify columns required for header
    fieldnames = ['id','name', 'country', 'city', 'user_votes', 'user_agr_rating', 'cuisines']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader() # write header to csv file

    # loop through each dictionary object to obtain data of all restaurants
    for d in data:
        restaurants = d['restaurants']

        # loop through each restaurant to obtain required values
        for rest in restaurants:
            rest = rest['restaurant']
            id = rest['id']
            name = rest['name']
            country = rest['location']['country_id']
            city = rest['location']['city']
            user_votes= rest['user_rating']['votes']
            user_agr_rating = float(rest['user_rating']['aggregate_rating'])
            cuisines = rest['cuisines']

            # save all required values into a dictionary object
            row = {'id': id, 'name': name, 'country': country, 'city': city, 'user_votes': user_votes, 'user_agr_rating' : user_agr_rating, 'cuisines': cuisines}
            # write values of each restaurant into a row of the csv file
            writer.writerow(row)


### 2) Extract past events

In [62]:
# write to the csv with similar encoding
with open('restaurant_events.csv', 'w', newline='', encoding="utf8") as csv_file:
    # specify columns required for header
    fieldnames = ['event_id','rest_id', 'rest_name', 'photo_url', 'title', 'start_date', 'end_date']
    
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader() # write header to csv file

    # loop through each dictionary object to obtain data of all restaurants
    for d in data:
        restaurants = d['restaurants']
        
        # loop through each restaurant to obtain events
        for rest in restaurants:
            rest = rest['restaurant']
            
            # if events are recorded by zomato for that restaurant
            if 'zomato_events' in rest.keys():
                events = rest['zomato_events']
                
                # loop through each event that happened in the restaurant
                for event in events:
                    event = event['event']
                    start_date = datetime.strptime(event['start_date'], "%Y-%m-%d")
                    
                    # assume that "past event in the month of April 2019" starts in the month of April
                    if start_date.year == 2019 and start_date.month == 4:
                        row = dict.fromkeys(fieldnames, "NA") # populate rows as 'NA' first

                        # obtain required values for each event
                        row['event_id'] = event['event_id']
                        row['rest_id']= rest['R']['res_id']
                        row['rest_name'] = rest['name']
                        row['title'] = event['title']
                        row['start_date'] = event['start_date']
                        row['end_date'] = event['end_date']

                        # concatenate each url together
                        url = ''
                        for photo in event['photos']:
                            photo = photo['photo']
                            url += photo['url']
                            url += " "
                        
                        # if photo url for the event exists, replace the original NA string
                        if len(url) > 0:
                            row['photo_url'] = url

                        # write values of each restaurant into a row of the csv file
                        writer.writerow(row)


### 3) Extract aggregate ratings

In [69]:
# initialize dictionary with each rating being a list
ratings = {'Excellent': [], 'Very Good': [], 'Good': [], 'Average': [], 'Poor': []}

# loop through each dictionary object to obtain data of all restaurants
for d in data:
    restaurants = d['restaurants']
    
    # loop through each restaurant to obtain events
    for rest in restaurants:
        # extract the rating information from each restaurant
        rating_text = rest['restaurant']['user_rating']['rating_text']
        agr_rating = rest['restaurant']['user_rating']['aggregate_rating']

        # keep only the values for those ratings we are concerned about
        if rating_text in ratings.keys():
            ratings[rating_text].append(agr_rating) # add each rating to the list

# calculate the lowest and highest value for each rating to find its threshold
for k,v in ratings.items():
    lowest_val = min(v)
    highest_val = max(v)
    print(f'{k} Rating: {lowest_val} - {highest_val}')

Excellent Rating: 4.5 - 4.9
Very Good Rating: 4.0 - 4.4
Good Rating: 3.5 - 3.9
Average Rating: 2.5 - 3.4
Poor Rating: 2.2 - 2.2
