In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
zomato_file_path = os.path.join(os.path.pardir, 'data', 'raw', 'zomato.csv.zip')
country_code_file_path = os.path.join(os.path.pardir, 'data', 'raw', 'Country-Code.xlsx')

### 1. load the Dataset into Pandas Dataframe

In [None]:
zomato_df = pd.read_csv(zomato_file_path, encoding="ISO-8859-1")
country_df = pd.read_excel(country_code_file_path)

In [None]:
zomato_df.describe()

In [None]:
zomato_df.columns

In [None]:
country_df.describe()

In [None]:
zomato_df.head()

In [None]:
country_df.head()

### 2.  Determine which country has the best Resturants

In [None]:
# add country column to zomato_df
def add_country(country_code):
    row = country_df[country_df["Country Code"] == country_code].iloc[0]
    return row["Country"]

zomato_df["Country"] = zomato_df["Country Code"].map(add_country)

#### - The country best resturants by mean Average rating

In [None]:
# find the country best resturants by mean Average rating
# group zomato_df by country and find mean of the 'Aggregate rating' 
country_group_df = zomato_df.groupby('Country')['Aggregate rating'].mean()
print(country_group_df)
# sort the values and reset index to add country to columns
country_group_df = country_group_df.sort_values(ascending=False).reset_index()
print(country_group_df)

print('The country with the best Resturant is {}'.format(country_group_df.iloc[0]["Country"]))

#### - The country best resturants by cummulative Average rating 

In [None]:
# find the country with the best resturants by cummulative Average rating 
# this result can be skewed since countries do not have the same number of resturants
# group by country and find sum of 'Aggregate rating' 
country_group_df = zomato_df.groupby('Country')['Aggregate rating'].sum()
print(country_group_df)
# sort the values and reset index to add country to columns
country_group_df = country_group_df.sort_values(ascending=False).reset_index()
print(country_group_df)

print('The country with the best Resturant is {}'.format(country_group_df.iloc[0]["Country"]))

### 3. Determine the Top 10 most expensive restaurants and their cuisines.

In [None]:
# 3 What are the top 10 most expensive restaurants and their cuisines 
zomato_df.Currency.unique()

In [None]:
# currency_converter package did not have all the currencies so the rates are hard coded
# this are the latest rates as of 8 august 2018

rate = {
    'Botswana Pula(P)': 0.097 ,
    'Brazilian Real(R$)': 0.27,
    'Dollar($)': 1,
    'Emirati Diram(AED)': 0.27,
    'Indian Rupees(Rs.)': 0.015,
    'Indonesian Rupiah(IDR)': 0.000069,
    'NewZealand($)': 0.67,
    'Pounds(\x8c£)': 1.29,
    'Qatari Rial(QR)': 0.27,
    'Rand(R)': 0.074,
    'Sri Lankan Rupee(LKR)': 0.0063,
    'Turkish Lira(TL)': 0.19
}

def add_average_cost_for_2_in_dollars(row):
    return (rate[row['Currency']] * row['Average Cost for two'])

# add average_cost_for_2_$ column to zomato_df
zomato_df["average_cost_for_2_$"] = zomato_df.apply(add_average_cost_for_2_in_dollars, axis=1)
    

In [None]:
# sort zomato_df by average_cost_for_2_$ in descending order and get first ten
# these are the most expensive resturants and their cuisines
zomato_df.sort_values("average_cost_for_2_$", ascending=False).head(10)[['Restaurant Name','Cuisines', 'Country',"average_cost_for_2_$"]]

### 4. Some visualizations

In [None]:
# some visualizations 
zomato_df['Price range'].plot.hist()

In [None]:
# some visualizations 
zomato_df.boxplot('Aggregate rating', 'Price range')

In [None]:
zomato_df['Country'].value_counts().plot.pie(figsize=(10, 10))

In [None]:
# visualizations
zomato_df.plot.scatter(y='average_cost_for_2_$', x='Aggregate rating')