In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import linregress
import numpy as np
import pprint
import folium
from folium.plugins import HeatMap
import matplotlib.pyplot as plt
import plotly.express as px
# Study data files
Top_Res_Path = "WorldsBestRestaurants.csv"

# Read the Res data and the study results
Top_Res_df = pd.read_csv(Top_Res_Path)
Top_Res_df

Unnamed: 0,year,rank,restaurant,location,country,lat,lng
0,2002,1,El Bulli,Roses,Spain,42.263949,3.179553
1,2002,2,Restaurant Gordon Ramsay,London,United Kingdom,51.507218,-0.127586
2,2002,3,The French Laundry,Yountville,United States,38.401578,-122.360810
3,2002,4,Rockpool,Sydney,Australia,-33.868820,151.209295
4,2002,5,Spoon des Iles,Ile Maurice,Mauritius,-20.348404,57.552152
...,...,...,...,...,...,...,...
1045,2023,46,Orfali Bros Bistro,Dubai,United Arab Emirates,25.204849,55.270783
1046,2023,47,Mayta,Lima,Peru,-12.046689,-77.043088
1047,2023,48,La Grenouillère,La Madelaine-sous-Montreuil,France,50.466809,1.748056
1048,2023,49,Rosetta,Mexico City,Mexico,19.432608,-99.133208


In [2]:
# Checking the number of Restaurants.
Restaurant_count = Top_Res_df ['restaurant'].nunique()
Restaurant_count

257

In [3]:
# Checking the number of Countries.
Country_count = Top_Res_df ['country'].nunique()
Country_count

42

In [4]:
# Checking the number of Location.
Location_count = Top_Res_df ['location'].nunique()
Location_count

135

In [5]:
unique_country = Top_Res_df ['country'].unique()
print(', '.join(unique_country))


Spain, United Kingdom, United States, Australia, Mauritius, France, Argentina, Canada, India, United Arab Emirates, Hong Kong, Sweden, Japan, Italy, Barbados, Switzerland, Thailand, Monaco, Türkiye, South Africa, Ireland, Netherlands, Iceland, Hungary, Kenya, Norway, Germany, Belgium, Estonia, Denmark, Finland, Brazil, Austria, Singapore, Mexico, Peru, Russia, Portugal, China, Chile, Slovenia, Colombia


In [6]:
#Here is the countries list:
#Spain, United Kingdom, United States, Australia, Mauritius, France, Argentina, Canada, India, 
#United Arab Emirates, Hong Kong, Sweden, Japan, Italy, Barbados, Switzerland, Thailand, Monaco, 
#Türkiye, South Africa, Ireland, Netherlands, Iceland, Hungary, Kenya, Norway, Germany, Belgium, 
#Estonia, Denmark, Finland, Brazil, Austria, Singapore, Mexico, Peru, 
#Russia, Portugal, China, Chile, Slovenia, Colombia

In [7]:
region_map= {
    'Africa': ['Kenya', 'Mauritius', 'South Africa'],
    'Asia': ['China', 'Hong Kong', 'India', 'Japan', 'Singapore', 'Thailand', 'United Arab Emirates', 'Türkiye', 'Russia'],
    'Australia': ['Australia'],
    'Europe': ['Austria', 'Belgium', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Monaco', 'Netherlands', 'Norway', 'Portugal', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'United Kingdom'],
    'North America': ['Canada', 'Mexico', 'United States'],
    'South America': ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Peru']
    }

In [8]:
regions = []


for country in Top_Res_df['country']:
        region = next((key for key, countries in region_map.items() if country in countries),None)
        regions.append(region)


Top_Res_df['Region'] = regions

In [9]:
Top_Res_df.head()
Top_Res_df.to_csv('cleaned_Res.csv', index=False)

In [10]:
unique_res = Top_Res_df.drop_duplicates(subset='restaurant', keep='first')
unique_res.reset_index(drop=True, inplace=True)
unique_res

Unnamed: 0,year,rank,restaurant,location,country,lat,lng,Region
0,2002,1,El Bulli,Roses,Spain,42.263949,3.179553,Europe
1,2002,2,Restaurant Gordon Ramsay,London,United Kingdom,51.507218,-0.127586,Europe
2,2002,3,The French Laundry,Yountville,United States,38.401578,-122.360810,North America
3,2002,4,Rockpool,Sydney,Australia,-33.868820,151.209295,Australia
4,2002,5,Spoon des Iles,Ile Maurice,Mauritius,-20.348404,57.552152,Africa
...,...,...,...,...,...,...,...,...
252,2023,36,Plénitude,Paris,France,48.857548,2.351377,Europe
253,2023,37,Sézanne,Tokyo,Japan,35.676423,139.650027,Asia
254,2023,46,Orfali Bros Bistro,Dubai,United Arab Emirates,25.204849,55.270783,Asia
255,2023,48,La Grenouillère,La Madelaine-sous-Montreuil,France,50.466809,1.748056,Europe


In [11]:
heatmap_center = [unique_res['lat'].mean(), unique_res['lng'].mean()]
res_heatmap = folium.Map(location=heatmap_center,zoom_start=2)
heat_data = unique_res[['lat', 'lng']].values.tolist()
HeatMap(heat_data).add_to(res_heatmap)
res_heatmap.save('Top_rated_restaurant_Heatmap.html')


In [12]:
res_repeat = 'RestaurantRepeatRanking.csv'
cleaned_res = 'cleaned_Res.csv'
res_repeat = pd.read_csv(res_repeat)
cleaned_res = pd.read_csv(cleaned_res)

# Performing a left join on the 'restaurant' column to add the 'Region' to the repeat ranking data
merged = pd.merge(res_repeat, cleaned_res[['restaurant', 'Region']], on='restaurant', how='left')

# Removing duplicate restaurant names and resetting the index
merged = merged.drop_duplicates(subset='restaurant').reset_index(drop=True)


# Display the first few rows of the merged dataset with reset index
merged.head()


Unnamed: 0,restaurant,count,Region
0,Mugaritz,17,Europe
1,Arpège,17,Europe
2,Le Calandre,17,Europe
3,Le Bernardin,17,North America
4,Arzak,16,Europe


In [13]:
fig = px.bar(
    merged,
    x='restaurant',
    y='count',
    color='Region',
    title='Restaurant Repeat Ranking by Region',
    labels={'restaurant': 'Restaurant', 'count': 'Repeat Count'},
    hover_name='restaurant'
)

# Show the interactive plot
fig.write_html('restaurant_repeat_ranking.html')
fig.show()