In [77]:
import opendatasets as od
import pandas as pd
import numpy as np
import os
import copy

## Table of Contents
* [1. Preprocessing Data](#1)
    * [1.1. Downloading data](#1_1)
<!--     * [1.2. Deleting outliers and mistakes in data](#1_2)
    * [1.3. Adding new columns](#1_3)
    * [1.4. Adding Geospatial data](#1_4) -->
* [2. EDA....](#2)
<!--     * [2.1. Comparing of cities](#2_1)
    * [2.2. Bivariate Analysis of Warsaw](#2_2) -->
* [3. Model Fitting & Evaluation](#3)

<!--     * [3.1. Model 1: Linear Regression](#3_linear)
    * [3.2. Model 2: Ridge](#3_ridge)
    * [3.3. Model 3: Lasso](#3_lasso)
    * [3.4. Model 4: k-Nearest Neighbors](#3_knn)
    * [3.5. Model 5: Decision Tree](#3_tree)
    * [3.6. Model 6: Random Forest](#3_forest)
    * [3.7. Ensemble Method](#3_ensemble)
        * [3.7.1. Voting Regressor](#3_voting)
        * [3.7.2. Bagging Regressor](#3_bagging)
        * [3.7.3. AdaBoost Regressor](#3_adaboost)
        * [3.7.4. Gradient Boosting Regressor](#3_gradient)
        * [3.7.5. XGBoost](#3_xgboost)
        * [3.7.6. Stacked Ensembles](#3_stacked)
* [4. Conclusion](#4)
* [5. Predicting a price by example features](#5) -->

<a id="1"></a>

# 1. EDA

<a id="1_1"></a>

## 1.1. Downloading data

In [5]:
od.download("https://www.kaggle.com/datasets/stefanoleone992/tripadvisor-european-restaurants")


Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: alexandertropin
Your Kaggle Key: ········
Downloading tripadvisor-european-restaurants.zip to ./tripadvisor-european-restaurants


100%|████████████████████████████████████████| 102M/102M [00:38<00:00, 2.78MB/s]





In [2]:
file_path = 'tripadvisor-european-restaurants/tripadvisor_european_restaurants.csv'
rest_df = pd.read_csv(file_path)
 
rest_df.head()

  rest_df = pd.read_csv(file_path)


Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude,...,excellent,very_good,average,poor,terrible,food,service,value,atmosphere,keywords
0,g10001637-d10002227,Le 147,"[""Europe"", ""France"", ""Nouvelle-Aquitaine"", ""Ha...",France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"10 Maison Neuve, 87510 Saint-Jouvent France",45.961674,1.169131,...,2.0,0.0,0.0,0.0,0.0,4.0,4.5,4.0,,
1,g10001637-d14975787,Le Saint Jouvent,"[""Europe"", ""France"", ""Nouvelle-Aquitaine"", ""Ha...",France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"16 Place de l Eglise, 87510 Saint-Jouvent France",45.95704,1.20548,...,2.0,2.0,1.0,0.0,0.0,,,,,
2,g10002858-d4586832,Au Bout du Pont,"[""Europe"", ""France"", ""Centre-Val de Loire"", ""B...",France,Centre-Val de Loire,Berry,Rivarennes,"2 rue des Dames, 36800 Rivarennes France",46.635895,1.386133,...,3.0,1.0,0.0,0.0,0.0,,,,,
3,g10002986-d3510044,Le Relais de Naiade,"[""Europe"", ""France"", ""Nouvelle-Aquitaine"", ""Co...",France,Nouvelle-Aquitaine,Correze,Lacelle,"9 avenue Porte de la Correze 19170, 19170 Lace...",45.64261,1.82446,...,1.0,0.0,0.0,0.0,0.0,4.5,4.5,4.5,,
4,g10022428-d9767191,Relais Du MontSeigne,"[""Europe"", ""France"", ""Occitanie"", ""Aveyron"", ""...",France,Occitanie,Aveyron,Saint-Laurent-de-Levezou,"route du Montseigne, 12620 Saint-Laurent-de-Le...",44.20886,2.96047,...,4.0,7.0,0.0,0.0,0.0,4.5,4.5,4.5,,


In [3]:
# removing the big dataset

os.remove(file_path)

In [4]:
rest_df.shape

(1083397, 42)

In [5]:
rest_df.isna().sum()

restaurant_link                           0
restaurant_name                           0
original_location                         0
country                                   0
region                                50323
province                             340632
city                                 400685
address                                   0
latitude                              15790
longitude                             15790
claimed                                1842
awards                               820264
popularity_detailed                   94988
popularity_generic                    97792
top_tags                             110634
price_level                          277205
price_range                          779070
meals                                448050
cuisines                             169103
special_diets                        743141
features                             765990
vegetarian_friendly                       0
vegan_options                   

In [6]:
rest_df.isna().sum() / rest_df.shape[0] * 100.

restaurant_link                       0.000000
restaurant_name                       0.000000
original_location                     0.000000
country                               0.000000
region                                4.644927
province                             31.441106
city                                 36.984134
address                               0.000000
latitude                              1.457453
longitude                             1.457453
claimed                               0.170021
awards                               75.712227
popularity_detailed                   8.767608
popularity_generic                    9.026423
top_tags                             10.211769
price_level                          25.586650
price_range                          71.909928
meals                                41.356031
cuisines                             15.608590
special_diets                        68.593600
features                             70.702614
vegetarian_fr

In [7]:
rest_df.dtypes

restaurant_link                       object
restaurant_name                       object
original_location                     object
country                               object
region                                object
province                              object
city                                  object
address                               object
latitude                             float64
longitude                            float64
claimed                               object
awards                                object
popularity_detailed                   object
popularity_generic                    object
top_tags                              object
price_level                           object
price_range                           object
meals                                 object
cuisines                              object
special_diets                         object
features                              object
vegetarian_friendly                   object
vegan_opti

In [8]:
rest_df.loc[:, 'restaurant_link':'longitude'].head()

Unnamed: 0,restaurant_link,restaurant_name,original_location,country,region,province,city,address,latitude,longitude
0,g10001637-d10002227,Le 147,"[""Europe"", ""France"", ""Nouvelle-Aquitaine"", ""Ha...",France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"10 Maison Neuve, 87510 Saint-Jouvent France",45.961674,1.169131
1,g10001637-d14975787,Le Saint Jouvent,"[""Europe"", ""France"", ""Nouvelle-Aquitaine"", ""Ha...",France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"16 Place de l Eglise, 87510 Saint-Jouvent France",45.95704,1.20548
2,g10002858-d4586832,Au Bout du Pont,"[""Europe"", ""France"", ""Centre-Val de Loire"", ""B...",France,Centre-Val de Loire,Berry,Rivarennes,"2 rue des Dames, 36800 Rivarennes France",46.635895,1.386133
3,g10002986-d3510044,Le Relais de Naiade,"[""Europe"", ""France"", ""Nouvelle-Aquitaine"", ""Co...",France,Nouvelle-Aquitaine,Correze,Lacelle,"9 avenue Porte de la Correze 19170, 19170 Lace...",45.64261,1.82446
4,g10022428-d9767191,Relais Du MontSeigne,"[""Europe"", ""France"", ""Occitanie"", ""Aveyron"", ""...",France,Occitanie,Aveyron,Saint-Laurent-de-Levezou,"route du Montseigne, 12620 Saint-Laurent-de-Le...",44.20886,2.96047


In [9]:
rest_df.loc[:, 'claimed':'special_diets'].head()

Unnamed: 0,claimed,awards,popularity_detailed,popularity_generic,top_tags,price_level,price_range,meals,cuisines,special_diets
0,Claimed,,#1 of 2 Restaurants in Saint-Jouvent,#1 of 2 places to eat in Saint-Jouvent,"Cheap Eats, French",€,,"Lunch, Dinner",French,
1,Unclaimed,,#2 of 2 Restaurants in Saint-Jouvent,#2 of 2 places to eat in Saint-Jouvent,Cheap Eats,€,,,,
2,Claimed,,#1 of 1 Restaurant in Rivarennes,#1 of 1 places to eat in Rivarennes,"Cheap Eats, French, European",€,,"Dinner, Lunch, Drinks","French, European",
3,Claimed,,#1 of 1 Restaurant in Lacelle,#1 of 1 places to eat in Lacelle,"Cheap Eats, French",€,,"Lunch, Dinner",French,
4,Unclaimed,,#1 of 1 Restaurant in Saint-Laurent-de-Levezou,#1 of 1 places to eat in Saint-Laurent-de-Levezou,"Mid-range, French",€€-€€€,,"Lunch, Dinner",French,


In [10]:
rest_df.loc[:, 'features':'working_shifts_per_week'].head()

Unnamed: 0,features,vegetarian_friendly,vegan_options,gluten_free,original_open_hours,open_days_per_week,open_hours_per_week,working_shifts_per_week
0,"Reservations, Seating, Wheelchair Accessible, ...",N,N,N,,,,
1,,N,N,N,,,,
2,"Reservations, Seating, Table Service, Wheelcha...",N,N,N,,,,
3,"Reservations, Seating, Serves Alcohol, Table S...",N,N,N,,,,
4,"Reservations, Seating, Wheelchair Accessible, ...",N,N,N,,,,


In [11]:
rest_df.loc[:, 'avg_rating':'value'].head()

Unnamed: 0,avg_rating,total_reviews_count,default_language,reviews_count_in_default_language,excellent,very_good,average,poor,terrible,food,service,value
0,4.0,36.0,English,2.0,2.0,0.0,0.0,0.0,0.0,4.0,4.5,4.0
1,4.0,5.0,All languages,5.0,2.0,2.0,1.0,0.0,0.0,,,
2,5.0,13.0,English,4.0,3.0,1.0,0.0,0.0,0.0,,,
3,4.0,34.0,English,1.0,1.0,0.0,0.0,0.0,0.0,4.5,4.5,4.5
4,4.5,11.0,All languages,11.0,4.0,7.0,0.0,0.0,0.0,4.5,4.5,4.5


In [12]:
rest_df.loc[:, 'atmosphere':'keywords'].head()

Unnamed: 0,atmosphere,keywords
0,,
1,,
2,,
3,,
4,,


## 1.2 Looking at columns with "," separator 

In [13]:
# rating of values in the column

def rating_of_column_values(df, column, separator=', '):
    df2 = df[list(df)]
    df2['column_2'] = df2[column].str.split(separator)
    tags_list = list(set().union(*df2['column_2'].dropna()))

    tags_list_2 = []
    for tag in tags_list:
        tags_list_2.append([tag, df2['column_2'].dropna().map(set([tag]).issubset).fillna(False).value_counts()[True]])

    tags_list_2 = sorted(tags_list_2, key=lambda x: x[1], reverse=True)
    df2 = df2.drop(['column_2'], axis=1)
    return tags_list_2



In [14]:
# columns with ', ' (amount and percentage)

for col in rest_df.select_dtypes(include='object').columns:
    rest_with_comma = len(rest_df[rest_df[col].str.contains(', ', na=False)])
    if rest_with_comma > 0:
        print("{0:<20s} {1:>10} {2:>10f}".format(col, rest_with_comma, round(rest_with_comma/rest_df.shape[0]*100, 2)))

restaurant_name            3930   0.360000
original_location       1083397 100.000000
city                          3   0.000000
address                 1074625  99.190000
awards                   214179  19.770000
top_tags                 850418  78.500000
meals                    522083  48.190000
cuisines                 627912  57.960000
special_diets            168200  15.530000
features                 257037  23.730000
original_open_hours      593832  54.810000
keywords                  99198   9.160000


In [15]:
rating_of_column_values(rest_df, 'awards', separator=', ')

[["Travellers' Choice", 147397],
 ['Certificate of Excellence 2017', 142887],
 ['Certificate of Excellence 2020', 140248],
 ['Certificate of Excellence 2019', 139995],
 ['Certificate of Excellence 2018', 138741],
 ['Certificate of Excellence 2016', 107686],
 ['Certificate of Excellence 2015', 66340],
 ['Certificate of Excellence 2014', 41445],
 ['Certificate of Excellence 2013', 31065],
 ['Certificate of Excellence 2012', 11377],
 ['Michelin 2021 for The Michelin Plate: Good cooking', 5796],
 ['Certificate of Excellence 2011', 4335],
 ['Michelin 2021 for Comfortable restaurant', 3687],
 ['Michelin 2021 for Simple restaurant', 3478],
 ['good value cooking', 1816],
 ['worth a stop!', 1672],
 ['Michelin 2020 for The Michelin Plate: Good cooking', 1366],
 ['Michelin 2021 for Bib Gourmand: good quality', 1323],
 ['Michelin 2021 for Very comfortable restaurant', 1301],
 ['Michelin 2021 for One Michelin Star: High quality cooking', 1292],
 ['Michelin 2020 for Comfortable restaurant', 1150],
 

In [16]:
rating_of_column_values(rest_df, 'top_tags', separator=', ')

[['Mid-range', 538207],
 ['Cheap Eats', 240351],
 ['Italian', 236822],
 ['European', 177712],
 ['Mediterranean', 155388],
 ['Vegetarian Friendly', 133520],
 ['Pizza', 113259],
 ['Cafe', 107793],
 ['French', 98892],
 ['Bar', 90677],
 ['Spanish', 82501],
 ['Seafood', 80241],
 ['British', 64133],
 ['Fast food', 58479],
 ['Pub', 53573],
 ['Asian', 45942],
 ['German', 36346],
 ['International', 32942],
 ['Quick Bites', 29544],
 ['Chinese', 29113],
 ['Dessert', 28969],
 ['Fine Dining', 28081],
 ['American', 27972],
 ['Greek', 26215],
 ['Indian', 21197],
 ['Japanese', 20717],
 ['Barbecue', 20511],
 ['Steakhouse', 18576],
 ['Portuguese', 17682],
 ['Vegan Options', 16240],
 ['Sushi', 15435],
 ['Healthy', 14077],
 ['Dutch', 14041],
 ['Bakeries', 13373],
 ['Grill', 13261],
 ['Belgian', 11558],
 ['Austrian', 11028],
 ['Central European', 11015],
 ['Turkish', 10264],
 ['Wine Bar', 9745],
 ['Contemporary', 9620],
 ['Thai', 9446],
 ['Polish', 9374],
 ['Brew Pub', 9062],
 ['Deli', 8498],
 ['Middle Eas

In [17]:
rating_of_column_values(rest_df, 'meals', separator=', ')

[['Dinner', 532366],
 ['Lunch', 511678],
 ['Breakfast', 181695],
 ['Drinks', 117450],
 ['Brunch', 101571],
 ['After-hours', 91200]]

In [18]:
rating_of_column_values(rest_df, 'cuisines', separator=', ')

[['Italian', 235823],
 ['European', 208436],
 ['Mediterranean', 173020],
 ['Pizza', 114070],
 ['Cafe', 109188],
 ['French', 98480],
 ['Spanish', 93191],
 ['Bar', 90508],
 ['Pub', 82113],
 ['Seafood', 81397],
 ['British', 67673],
 ['Fast food', 61154],
 ['Asian', 53658],
 ['German', 36236],
 ['International', 34727],
 ['Greek', 30774],
 ['Chinese', 29046],
 ['American', 27921],
 ['Barbecue', 26997],
 ['Healthy', 26168],
 ['Portuguese', 23781],
 ['Grill', 22019],
 ['Indian', 21119],
 ['Japanese', 20645],
 ['Central European', 20349],
 ['Steakhouse', 18511],
 ['Southern-Italian', 18191],
 ['Sushi', 17777],
 ['Wine Bar', 17482],
 ['Gastropub', 14764],
 ['Contemporary', 14665],
 ['Dutch', 14076],
 ['Diner', 13444],
 ['Deli', 11621],
 ['Middle Eastern', 11588],
 ['Central-Italian', 11582],
 ['Belgian', 11549],
 ['Turkish', 11318],
 ['Austrian', 11026],
 ['Thai', 11004],
 ['Street Food', 10743],
 ['Fusion', 10702],
 ['Northern-Italian', 10249],
 ['Polish', 9367],
 ['Brew Pub', 9145],
 ['Soups

In [19]:
rating_of_column_values(rest_df, 'special_diets', separator=', ')

[['Vegetarian Friendly', 324017],
 ['Vegan Options', 136597],
 ['Gluten Free Options', 123497],
 ['Halal', 6709],
 ['Kosher', 298]]

In [20]:
rating_of_column_values(rest_df, 'features', separator=', ')

[['Seating', 228439],
 ['Reservations', 215387],
 ['Table Service', 191467],
 ['Wheelchair Accessible', 146385],
 ['Serves Alcohol', 129553],
 ['Takeout', 94983],
 ['Outdoor Seating', 74512],
 ['Accepts Credit Cards', 58121],
 ['Highchairs Available', 52849],
 ['Full Bar', 49332],
 ['Free Wifi', 41665],
 ['Parking Available', 27316],
 ['Wine and Beer', 25291],
 ['Delivery', 18758],
 ['Television', 17841],
 ['Accepts Visa', 17558],
 ['Accepts Mastercard', 16681],
 ['Street Parking', 14063],
 ['Free off-street parking', 12344],
 ['Digital Payments', 10863],
 ['Validated Parking', 7949],
 ['Accepts American Express', 7707],
 ['Gift Cards Available', 6103],
 ['Cash Only', 6046],
 ['Private Dining', 5075],
 ['Dog Friendly', 5064],
 ['Buffet', 4562],
 ['Family style', 4423],
 ['Live Music', 4080],
 ['Non-smoking restaurants', 2607],
 ['Waterfront', 1722],
 ['Accepts Discover', 1262],
 ['Valet Parking', 1155],
 ['Playgrounds', 1141],
 ['Beach', 668],
 ['Drive Thru', 499],
 ['BYOB', 494],
 ['S

In [21]:
# too long
# rating_of_column_values(rest_df, 'original_open_hours', separator=', ')

In [22]:
# too long
# rating_of_column_values(rest_df, 'keywords', separator=', ')

In [23]:
rating_of_column_values(rest_df, 'country', separator=', ')

[['Italy', 224763],
 ['Spain', 157479],
 ['France', 155288],
 ['England', 144681],
 ['Germany', 115333],
 ['Greece', 33763],
 ['Portugal', 32592],
 ['The Netherlands', 29792],
 ['Poland', 24698],
 ['Belgium', 23711],
 ['Austria', 20487],
 ['Sweden', 18555],
 ['Czech Republic', 14844],
 ['Scotland', 14215],
 ['Ireland', 11203],
 ['Denmark', 9485],
 ['Wales', 9134],
 ['Croatia', 8375],
 ['Romania', 7842],
 ['Hungary', 7431],
 ['Finland', 7372],
 ['Bulgaria', 4469],
 ['Slovakia', 4251],
 ['Northern Ireland', 3634]]

## 1.3 Fixing the country name

In [24]:
def theUK(country):
    if country in ['England', 'Scotland', 'Wales', 'Northern Ireland']:
        return 'United Kingdom'
    else:
        return country

def theNe(country):
    if country == 'Netherlands':
        return 'The Netherlands'
    else:
        return country

subreg = {
    'Italy': 'South',
    'Spain': 'South',
    'France': 'West',
    'United Kingdom': 'North',
#     'England': 'North',
    'Germany': 'West',
    'Greece': 'South',
    'Portugal': 'South',
    'The Netherlands': 'West',
    'Poland': 'East',
    'Belgium': 'West',
    'Austria': 'West',
    'Sweden': 'North',
    'Czech Republic': 'East',
#     'Scotland': 'North',
    'Ireland': 'North',
    'Denmark': 'North',
#     'Wales': 'North',
    'Croatia': 'South',
    'Romania': 'East',
    'Hungary': 'East',
    'Finland': 'North',
    'Bulgaria': 'East',
    'Slovakia': 'East'
#     'Northern Ireland': 'North'
}


In [25]:
rest_df_upd = copy.deepcopy(rest_df)
# df_upd = df_upd.drop(['column_2'], axis=1)
rest_df_upd = rest_df_upd.drop(['original_location'], axis=1)
rest_df_upd['country_origin'] = rest_df_upd['country']
rest_df_upd['country'] = rest_df_upd.apply(lambda x: theUK(x['country']),axis=1)
rest_df_upd['subregion'] = rest_df_upd.apply(lambda x: subreg[x['country']],axis=1)

rest_df_upd.head()

Unnamed: 0,restaurant_link,restaurant_name,country,region,province,city,address,latitude,longitude,claimed,...,average,poor,terrible,food,service,value,atmosphere,keywords,country_origin,subregion
0,g10001637-d10002227,Le 147,France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"10 Maison Neuve, 87510 Saint-Jouvent France",45.961674,1.169131,Claimed,...,0.0,0.0,0.0,4.0,4.5,4.0,,,France,West
1,g10001637-d14975787,Le Saint Jouvent,France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"16 Place de l Eglise, 87510 Saint-Jouvent France",45.95704,1.20548,Unclaimed,...,1.0,0.0,0.0,,,,,,France,West
2,g10002858-d4586832,Au Bout du Pont,France,Centre-Val de Loire,Berry,Rivarennes,"2 rue des Dames, 36800 Rivarennes France",46.635895,1.386133,Claimed,...,0.0,0.0,0.0,,,,,,France,West
3,g10002986-d3510044,Le Relais de Naiade,France,Nouvelle-Aquitaine,Correze,Lacelle,"9 avenue Porte de la Correze 19170, 19170 Lace...",45.64261,1.82446,Claimed,...,0.0,0.0,0.0,4.5,4.5,4.5,,,France,West
4,g10022428-d9767191,Relais Du MontSeigne,France,Occitanie,Aveyron,Saint-Laurent-de-Levezou,"route du Montseigne, 12620 Saint-Laurent-de-Le...",44.20886,2.96047,Unclaimed,...,0.0,0.0,0.0,4.5,4.5,4.5,,,France,West


In [26]:
boolean_dict = {
    'N': False,
    'Y': True
}

flag_columns = ['vegetarian_friendly','vegan_options','gluten_free']
for col in flag_columns:
    rest_df_upd = rest_df_upd.replace({col: boolean_dict})

rest_df_upd[flag_columns].head()

Unnamed: 0,vegetarian_friendly,vegan_options,gluten_free
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False


<a id="2"></a>

# 2. EDA.... VALIDATION ....

In [41]:
drop_columns = [
#     'original_location', # already deleted
    'address',
    'claimed',
    'awards',
    'popularity_detailed',
    'popularity_generic',
    'price_range', # too detailed
    'open_days_per_week',
    'open_hours_per_week',
    'working_shifts_per_week',
    
    'vegetarian_friendly','vegan_options','gluten_free', # flags + duplicated of 'special_diets'

    'excellent','very_good','average','poor','terrible',
    'default_language',
    'reviews_count_in_default_language',
    'keywords'  # a lot of NaN
]

In [None]:
# exceeded?
# 'top_tags'


address_columns = [
    'restaurant_link',
    'restaurant_name',
    'country','region','province','city', #'address',
    'latitude','longitude'
]

schedule_columns = [
    'features',
    'price_level', 
#     'price_range',
    'original_open_hours',
#     'open_days_per_week',
#     'open_hours_per_week',
#     'working_shifts_per_week'
]

menu_columns = [
    'meals','cuisines','special_diets' #,
#     'vegetarian_friendly','vegan_options','gluten_free' # flags + duplicated of 'special_diets'
]

rating_columns = [
    'avg_rating',
    'total_reviews_count',
#     'excellent','very_good','average','poor','terrible',
    'food','service','value','atmosphere'
]


In [None]:
price_level_dict = {
    '€': 'Cheap Eats',
    '€€-€€€': 'Mid-range',
    '€€€€': 'Fine Dining'
}

# kek = ger_df[['top_tags','price_level']]
# kek.head(15)

In [38]:
rest_df_short.groupby(['country'])['province'].nunique()
# region

country
Austria             16
Belgium             10
Bulgaria            27
Croatia             22
Czech Republic      14
Denmark             19
Finland             11
France              92
Germany             80
Greece              73
Hungary             19
Ireland             26
Italy              121
Poland              16
Portugal            27
Romania             41
Slovakia             2
Spain              135
Sweden              23
The Netherlands     12
United Kingdom     548
Name: province, dtype: int64

In [42]:
rest_df_short = rest_df_upd.drop(drop_columns, axis=1)
rest_df_short.head()

Unnamed: 0,restaurant_link,restaurant_name,country,region,province,city,latitude,longitude,top_tags,price_level,...,features,original_open_hours,avg_rating,total_reviews_count,food,service,value,atmosphere,country_origin,subregion
0,g10001637-d10002227,Le 147,France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,45.961674,1.169131,"Cheap Eats, French",€,...,"Reservations, Seating, Wheelchair Accessible, ...",,4.0,36.0,4.0,4.5,4.0,,France,West
1,g10001637-d14975787,Le Saint Jouvent,France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,45.95704,1.20548,Cheap Eats,€,...,,,4.0,5.0,,,,,France,West
2,g10002858-d4586832,Au Bout du Pont,France,Centre-Val de Loire,Berry,Rivarennes,46.635895,1.386133,"Cheap Eats, French, European",€,...,"Reservations, Seating, Table Service, Wheelcha...",,5.0,13.0,,,,,France,West
3,g10002986-d3510044,Le Relais de Naiade,France,Nouvelle-Aquitaine,Correze,Lacelle,45.64261,1.82446,"Cheap Eats, French",€,...,"Reservations, Seating, Serves Alcohol, Table S...",,4.0,34.0,4.5,4.5,4.5,,France,West
4,g10022428-d9767191,Relais Du MontSeigne,France,Occitanie,Aveyron,Saint-Laurent-de-Levezou,44.20886,2.96047,"Mid-range, French",€€-€€€,...,"Reservations, Seating, Wheelchair Accessible, ...",,4.5,11.0,4.5,4.5,4.5,,France,West


In [48]:
# for classification:

rest_df_clf = rest_df_short.drop([
    'restaurant_name', 'city', 'longitude','latitude', 'original_open_hours', 'country_origin','subregion',
    'avg_rating','total_reviews_count', 'food','service','value','atmosphere'
], axis=1)
rest_df_clf.head()


Unnamed: 0,restaurant_link,country,region,province,top_tags,price_level,meals,cuisines,special_diets,features
0,g10001637-d10002227,France,Nouvelle-Aquitaine,Haute-Vienne,"Cheap Eats, French",€,"Lunch, Dinner",French,,"Reservations, Seating, Wheelchair Accessible, ..."
1,g10001637-d14975787,France,Nouvelle-Aquitaine,Haute-Vienne,Cheap Eats,€,,,,
2,g10002858-d4586832,France,Centre-Val de Loire,Berry,"Cheap Eats, French, European",€,"Dinner, Lunch, Drinks","French, European",,"Reservations, Seating, Table Service, Wheelcha..."
3,g10002986-d3510044,France,Nouvelle-Aquitaine,Correze,"Cheap Eats, French",€,"Lunch, Dinner",French,,"Reservations, Seating, Serves Alcohol, Table S..."
4,g10022428-d9767191,France,Occitanie,Aveyron,"Mid-range, French",€€-€€€,"Lunch, Dinner",French,,"Reservations, Seating, Wheelchair Accessible, ..."


In [102]:
dutch_df = rest_df_clf[rest_df_clf['country'] == 'The Netherlands'].drop(['country','region','top_tags','restaurant_link'], axis=1)
dutch_df

Unnamed: 0,province,price_level,meals,cuisines,special_diets,features
155288,North Brabant Province,,,"Dutch, European",,
155289,North Brabant Province,€€-€€€,Dinner,Asian,,"Reservations, Seating, Takeout"
155290,North Brabant Province,€,,"Dutch, European",,
155291,Limburg Province,€€-€€€,"Lunch, Dinner, Brunch, Breakfast","European, Dutch",Vegetarian Friendly,
155292,Limburg Province,€€-€€€,"Lunch, Dinner","German, Dutch, European, Greek",,
...,...,...,...,...,...,...
185075,Gelderland Province,€€-€€€,"Lunch, Dinner, Drinks",Pub,,
185076,Gelderland Province,,,,,
185077,Gelderland Province,€€-€€€,"Drinks, Lunch, Dinner","American, Mexican",,
185078,Friesland Province,,"Breakfast, Lunch, Dinner, Brunch, Drinks","Dutch, Bar, Cafe, European, Pub, Diner",,"Reservations, Seating, Takeout, Outdoor Seatin..."


In [103]:
dutch_df2 = dutch_df[dutch_df['price_level'].notna()]
dutch_df2

Unnamed: 0,province,price_level,meals,cuisines,special_diets,features
155289,North Brabant Province,€€-€€€,Dinner,Asian,,"Reservations, Seating, Takeout"
155290,North Brabant Province,€,,"Dutch, European",,
155291,Limburg Province,€€-€€€,"Lunch, Dinner, Brunch, Breakfast","European, Dutch",Vegetarian Friendly,
155292,Limburg Province,€€-€€€,"Lunch, Dinner","German, Dutch, European, Greek",,
155294,South Holland Province,€€-€€€,"Lunch, Dinner, Breakfast","Dutch, Seafood, European, French","Vegetarian Friendly, Gluten Free Options",
...,...,...,...,...,...,...
185073,North Holland Province,€,"Lunch, Dinner","Dutch, Fast food, European",,"Delivery, Takeout, Outdoor Seating, Seating, P..."
185074,Gelderland Province,€€-€€€,,,,Reservations
185075,Gelderland Province,€€-€€€,"Lunch, Dinner, Drinks",Pub,,
185077,Gelderland Province,€€-€€€,"Drinks, Lunch, Dinner","American, Mexican",,


## NEED DUMMIES

In [65]:
def features_to_flags(df, feature_col_list):
    for col in feature_col_list:
        top_tags = [f[0] for f in rating_of_column_values(df, col, separator=', ') if f[1] >= 0.02 * len(df)]
        for t in top_tags:
            df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
        df = df.drop(col, axis=1)
    return df


In [104]:
# dummies without not populat options of feature columns

feature_col_list = ['meals','cuisines','special_diets','features'] # without 'top_tags' because this is repeatition
dutch_df3 = features_to_flags(dutch_df2, feature_col_list)
dutch_df3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0

Unnamed: 0,province,price_level,meals_Dinner,meals_Lunch,meals_Brunch,meals_Drinks,meals_Breakfast,meals_After-hours,cuisines_European,cuisines_Dutch,...,features_Seating,features_Table Service,features_Wheelchair Accessible,features_Serves Alcohol,features_Takeout,features_Outdoor Seating,features_Free Wifi,features_Highchairs Available,features_Accepts Credit Cards,features_Full Bar
155289,North Brabant Province,€€-€€€,1,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,0,0
155290,North Brabant Province,€,0,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
155291,Limburg Province,€€-€€€,1,1,1,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
155292,Limburg Province,€€-€€€,1,1,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
155294,South Holland Province,€€-€€€,1,1,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185073,North Holland Province,€,1,1,0,0,0,0,1,1,...,1,0,1,0,1,1,1,1,0,0
185074,Gelderland Province,€€-€€€,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
185075,Gelderland Province,€€-€€€,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
185077,Gelderland Province,€€-€€€,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [107]:
dutch_df3.groupby('price_level').size()

price_level
€          4138
€€-€€€    17497
€€€€        722
dtype: int64

In [85]:
# dutch_df[dutch_df['price_level'] == '€€€€']

In [105]:
# dutch_df4 = dutch_df3.drop(['restaurant_link','top_tags_Cheap Eats', 'top_tags_Mid-range', 'top_tags_Fine Dining'], axis=1)
# dutch_df4

In [113]:
dutch_features_percent_df = (dutch_df3.select_dtypes(include=['int64']).sum() / dutch_df3.shape[0] * 100.).sort_values(ascending=False)
dutch_features_percent_df


cuisines_European                    60.401664
meals_Dinner                         56.863622
special_diets_Vegetarian Friendly    54.488527
cuisines_Dutch                       48.163886
meals_Lunch                          43.986224
meals_Brunch                         20.669142
cuisines_Seafood                     20.526010
special_diets_Vegan Options          16.303619
features_Reservations                16.223107
features_Seating                     16.066556
special_diets_Gluten Free Options    15.337478
meals_Drinks                         13.405197
meals_Breakfast                      13.288903
features_Table Service               12.765577
cuisines_International               12.604553
features_Wheelchair Accessible       10.967482
cuisines_Asian                       10.502304
cuisines_Bar                         10.148947
cuisines_Italian                      9.701659
cuisines_French                       8.422418
cuisines_Cafe                         7.831999
features_Serv

In [130]:
dutch_df4 = dutch_df3[
    list(dutch_df3.select_dtypes(include=['object'])) + 
    list(dutch_features_percent_df[dutch_features_percent_df.values >= 10].index)
]
dutch_df4


Unnamed: 0,province,price_level,cuisines_European,meals_Dinner,special_diets_Vegetarian Friendly,cuisines_Dutch,meals_Lunch,meals_Brunch,cuisines_Seafood,special_diets_Vegan Options,features_Reservations,features_Seating,special_diets_Gluten Free Options,meals_Drinks,meals_Breakfast,features_Table Service,cuisines_International,features_Wheelchair Accessible,cuisines_Asian,cuisines_Bar
155289,North Brabant Province,€€-€€€,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0
155290,North Brabant Province,€,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
155291,Limburg Province,€€-€€€,1,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0
155292,Limburg Province,€€-€€€,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
155294,South Holland Province,€€-€€€,1,1,1,1,1,0,1,0,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185073,North Holland Province,€,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0
185074,Gelderland Province,€€-€€€,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
185075,Gelderland Province,€€-€€€,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
185077,Gelderland Province,€€-€€€,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0


In [68]:
# dutch_df5 = pd.get_dummies(dutch_df4, prefix=None, prefix_sep='_', columns=feature_col_list)
# # dummy_na=False, , sparse=False, drop_first=False, dtype=None
# dutch_df5

<a id="3"></a>

# 3. Model Fitting & Evaluation

In [52]:
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
# from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.preprocessing import LabelEncoder
# from sklearn.preprocessing import StandardScaler, PowerTransformer, MinMaxScaler

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score

In [131]:
dutch_df5 = dutch_df4[list(dutch_df4)]

labelencoder = LabelEncoder()
dutch_df5['price_level'] = labelencoder.fit_transform(dutch_df5['price_level'])
labelencoder2 = LabelEncoder()
dutch_df5['province'] = labelencoder2.fit_transform(dutch_df5['province'])
# dutch_df5['high_traffic'] = df3['high_traffic'].replace({"High": 1, "Low": 0})

X = dutch_df5.drop('price_level', axis=1) # Features
y = dutch_df5['price_level'] # Target variable

# Normalize the numeric features
# scaler = StandardScaler()
# X[num_names_1] = scaler.fit_transform(X[num_names_1])

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [132]:
labels_dict = dict(zip(labelencoder.classes_, range(len(labelencoder.classes_))))
labels_dict

{'€': 0, '€€-€€€': 1, '€€€€': 2}

In [133]:
labels_dict2 = dict(zip(labelencoder2.classes_, range(len(labelencoder2.classes_))))
labels_dict2

{'Drenthe Province': 0,
 'Flevoland Province': 1,
 'Friesland Province': 2,
 'Gelderland Province': 3,
 'Groningen Province': 4,
 'Limburg Province': 5,
 'North Brabant Province': 6,
 'North Holland Province': 7,
 'Overijssel Province': 8,
 'South Holland Province': 9,
 'Utrecht Province': 10,
 'Zeeland Province': 11}

In [78]:
grid = {
    "C": np.logspace(-3, 3, 7), # from 0.001 to 1000 by 7 steps
    "penalty": ["l1", "l2", "elasticnet", None],
    "multi_class": ["auto", "ovr", "multinomial"]
}
logreg = LogisticRegression()
logreg_cv = GridSearchCV(logreg, grid, cv=10)
logreg_cv.fit(X_train, y_train)

print("Tuned hyperparameters:", logreg_cv.best_params_)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

KeyboardInterrupt: 

In [79]:
logreg2 = LogisticRegression(C=0.001, multi_class='multinomial', penalty="l2") # best parameters
logreg2.fit(X_train, y_train)
y_pred_logreg = logreg2.predict(X_test)

print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_logreg))
print("Classification report:\n", classification_report(y_test, y_pred_logreg))


Confusion matrix:
 [[  37 1206    0]
 [   6 5250    0]
 [   0  209    0]]
Classification report:
               precision    recall  f1-score   support

           0       0.86      0.03      0.06      1243
           1       0.79      1.00      0.88      5256
           2       0.00      0.00      0.00       209

    accuracy                           0.79      6708
   macro avg       0.55      0.34      0.31      6708
weighted avg       0.78      0.79      0.70      6708



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [80]:
grid = {
    "C": np.logspace(-3, 3, 7), # from 0.001 to 1000 by 7 steps
    "penalty": ["l1", "l2"],
    "loss": ["hinge", "squared_hinge"]
}
svm = LinearSVC()
svm_cv = GridSearchCV(svm, grid, cv=10)
svm_cv.fit(X_train, y_train)

print("Tuned hyperparameters:", svm_cv.best_params_)



KeyboardInterrupt: 

In [81]:
svm2 = LinearSVC(C=0.01, loss='squared_hinge', penalty='l2') # best parameters
svm2.fit(X_train, y_train)
y_pred_svm = svm2.predict(X_test)

print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Classification report:\n", classification_report(y_test, y_pred_svm))

Confusion matrix:
 [[ 233 1010    0]
 [ 108 5148    0]
 [   1  208    0]]
Classification report:
               precision    recall  f1-score   support

           0       0.68      0.19      0.29      1243
           1       0.81      0.98      0.89      5256
           2       0.00      0.00      0.00       209

    accuracy                           0.80      6708
   macro avg       0.50      0.39      0.39      6708
weighted avg       0.76      0.80      0.75      6708



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


<a id="3_tree"></a>

## 3.3. Decision Tree

In [134]:
grid = {
    "min_samples_split": range(2,30,2),
    "max_depth": range(2,10,2),
#     "criterion": ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    "splitter": ['best', 'random']
}
tree = DecisionTreeClassifier()
tree_cv = GridSearchCV(tree, grid, cv=10)
tree_cv.fit(X_train, y_train)

print("Tuned hyperparameters:", tree_cv.best_params_)

Tuned hyperparameters: {'max_depth': 6, 'min_samples_split': 14, 'splitter': 'random'}


In [135]:
# for best params:

tree = DecisionTreeClassifier(min_samples_split=14, max_depth=6, splitter='random')
tree.fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)

print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_tree))
print("Classification report:\n", classification_report(y_test, y_pred_tree))

Confusion matrix:
 [[ 193 1050    0]
 [ 111 5144    1]
 [   1  208    0]]
Classification report:
               precision    recall  f1-score   support

           0       0.63      0.16      0.25      1243
           1       0.80      0.98      0.88      5256
           2       0.00      0.00      0.00       209

    accuracy                           0.80      6708
   macro avg       0.48      0.38      0.38      6708
weighted avg       0.75      0.80      0.74      6708



In [137]:
from sklearn.neighbors import KNeighborsClassifier


In [139]:
grid = {
    "n_neighbors": range(4, 10, 2),
    "leaf_size": range(20, 41, 10),
#     "p": range(1, 3),
    "algorithm": ['auto', 'ball_tree'] # ‘kd_tree’, ‘brute’
}
knn = KNeighborsClassifier()
knn_cv = GridSearchCV(knn, grid, cv=10)
knn_cv.fit(X_train, y_train)

print("Tuned hyperparameters:", knn_cv.best_params_)

Tuned hyperparameters: {'algorithm': 'ball_tree', 'leaf_size': 30, 'n_neighbors': 8}


In [140]:
# for best params:

knn = KNeighborsClassifier(n_neighbors=8, leaf_size=30, p=2, algorithm='ball_tree')
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_knn))
print("Classification report:\n", classification_report(y_test, y_pred_knn))

Confusion matrix:
 [[ 372  871    0]
 [ 441 4806    9]
 [  18  186    5]]
Classification report:
               precision    recall  f1-score   support

           0       0.45      0.30      0.36      1243
           1       0.82      0.91      0.86      5256
           2       0.36      0.02      0.04       209

    accuracy                           0.77      6708
   macro avg       0.54      0.41      0.42      6708
weighted avg       0.74      0.77      0.75      6708



In [154]:
for row in confusion_matrix(y_test, y_pred_knn).T:
    print(row)
    for i in row:
        print(i / sum(row))
        
# precision == column[i] / sum(column)
# recall == row[i] / sum(row)

[372 441  18]
0.44765342960288806
0.5306859205776173
0.021660649819494584
[ 871 4806  186]
0.14855875831485588
0.8197168684973563
0.03172437318778782
[0 9 5]
0.0
0.6428571428571429
0.35714285714285715


# ========================== DRAFTS ==========================  
# ========================== DRAFTS ==========================  
# ========================== DRAFTS ==========================  
# ========================== DRAFTS ========================== 

# 2. Hypothesis Testing

# 2.1 Preparing the dataset

In [31]:
ger_df = rest_df_upd[rest_df_upd['country'] == 'Germany']
print(ger_df.shape)

(115333, 44)


In [32]:
drop_columns = [
#     'original_location', # already deleted
    'claimed',
    'awards',
    'popularity_detailed',
    'popularity_generic',
    'price_range', # too detailed
    'open_days_per_week',
    'open_hours_per_week',
    'working_shifts_per_week',

    'excellent','very_good','average','poor','terrible',
    'default_language',
    'reviews_count_in_default_language',
    'keywords'  # a lot of NaN
]

In [33]:
ger_df = ger_df.drop(drop_columns, axis=1)
print(ger_df.shape)

(115333, 28)


In [45]:
# saving the dataset for Germany

# ger_df.to_csv('german_restaurants.csv', sep='\t', encoding='utf-8', index=False)


In [34]:
# ger_df = pd.read_csv('german_restaurants.csv')


In [35]:
# exceeded?
# 'top_tags'


address_columns = [
    'restaurant_link',
    'restaurant_name',
    'country','region','province','city','address',
    'latitude','longitude'
]

schedule_columns = [
    'features',
    'price_level', 
#     'price_range',
    'original_open_hours',
#     'open_days_per_week',
#     'open_hours_per_week',
#     'working_shifts_per_week'
]

menu_columns = [
    'meals','cuisines','special_diets',
    'vegetarian_friendly','vegan_options','gluten_free' # flags + duplicated of 'special_diets'
]

rating_columns = [
    'avg_rating',
    'total_reviews_count',
#     'excellent','very_good','average','poor','terrible',
    'food','service','value','atmosphere'
]


In [None]:
# key_df = ger_df[ger_df['keywords'].notna()]
# key_df

In [36]:
price_level_dict = {
    '€': 'Cheap Eats',
    '€€-€€€': 'Mid-range',
    '€€€€': 'Fine Dining'
}

# kek = ger_df[['top_tags','price_level']]
# kek.head(15)

## 2.2 ADDING DUMMIES INSTEAD OF FEATURES COLUMNS

In [165]:
rest_df_upd.head()

Unnamed: 0,restaurant_link,restaurant_name,country,region,province,city,address,latitude,longitude,claimed,...,poor,terrible,food,service,value,atmosphere,keywords,column_2,country_origin,subregion
0,g10001637-d10002227,Le 147,France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"10 Maison Neuve, 87510 Saint-Jouvent France",45.961674,1.169131,Claimed,...,0.0,0.0,4.0,4.5,4.0,,,[France],France,West
1,g10001637-d14975787,Le Saint Jouvent,France,Nouvelle-Aquitaine,Haute-Vienne,Saint-Jouvent,"16 Place de l Eglise, 87510 Saint-Jouvent France",45.95704,1.20548,Unclaimed,...,0.0,0.0,,,,,,[France],France,West
2,g10002858-d4586832,Au Bout du Pont,France,Centre-Val de Loire,Berry,Rivarennes,"2 rue des Dames, 36800 Rivarennes France",46.635895,1.386133,Claimed,...,0.0,0.0,,,,,,[France],France,West
3,g10002986-d3510044,Le Relais de Naiade,France,Nouvelle-Aquitaine,Correze,Lacelle,"9 avenue Porte de la Correze 19170, 19170 Lace...",45.64261,1.82446,Claimed,...,0.0,0.0,4.5,4.5,4.5,,,[France],France,West
4,g10022428-d9767191,Relais Du MontSeigne,France,Occitanie,Aveyron,Saint-Laurent-de-Levezou,"route du Montseigne, 12620 Saint-Laurent-de-Le...",44.20886,2.96047,Unclaimed,...,0.0,0.0,4.5,4.5,4.5,,,[France],France,West


In [262]:
rest_df_core = rest_df_upd[[
    'restaurant_link', 'country', 
    'total_reviews_count', 'avg_rating', 
    'price_level', 
    'features', 'meals', 'cuisines', 'special_diets'
]]

rest_df_core['meals_cnt'] = rest_df_core['meals'].apply(lambda x: str(x).count(', ')+1 if str(x) != 'nan' else 0)
rest_df_core['cuisines_cnt'] = rest_df_core['cuisines'].apply(lambda x: str(x).count(', ')+1 if str(x) != 'nan' else 0)
rest_df_core.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rest_df_core['meals_cnt'] = rest_df_core['meals'].apply(lambda x: str(x).count(', ')+1 if str(x) != 'nan' else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rest_df_core['cuisines_cnt'] = rest_df_core['cuisines'].apply(lambda x: str(x).count(', ')+1 if str(x) != 'nan' else 0)


Unnamed: 0,restaurant_link,country,total_reviews_count,avg_rating,price_level,features,meals,cuisines,special_diets,meals_cnt,cuisines_cnt
0,g10001637-d10002227,France,36.0,4.0,€,"Reservations, Seating, Wheelchair Accessible, ...","Lunch, Dinner",French,,2,1
1,g10001637-d14975787,France,5.0,4.0,€,,,,,0,0
2,g10002858-d4586832,France,13.0,5.0,€,"Reservations, Seating, Table Service, Wheelcha...","Dinner, Lunch, Drinks","French, European",,3,2
3,g10002986-d3510044,France,34.0,4.0,€,"Reservations, Seating, Serves Alcohol, Table S...","Lunch, Dinner",French,,2,1
4,g10022428-d9767191,France,11.0,4.5,€€-€€€,"Reservations, Seating, Wheelchair Accessible, ...","Lunch, Dinner",French,,2,1


In [221]:
def features_to_flags(df, feature_col_list):
    for col in feature_col_list:
        top_tags = [f[0] for f in rating_of_column_values(df, col, separator=', ') if f[1] >= 0.05 * len(df)]
        for t in top_tags:
            df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
        df = df.drop(col, axis=1)
    return df


In [230]:
# feature_col_list = ['price_level', 'features', 'meals', 'cuisines', 'special_diets']

# rest_df_flg = features_to_flags(rest_df_flg, feature_col_list)
# rest_df_flg.head()

# italy_df = features_to_flags(rest_df_flg[rest_df_flg['country'] == 'Italy'], feature_col_list)
# italy_df.head()

## 2.3 Division of Germany restaurants on 3 types by cuisines

In [256]:
def define_country_df(df, country):
    feature_col_list = ['price_level', 'features', 'meals', 'cuisines', 'special_diets']
    return features_to_flags(df[df['country'] == country], feature_col_list)

In [258]:
# cuisines_columns = [col for col in germany_df.columns if 'cuisines_' in col and '_cnt' not in col]
# cuisines_columns

In [249]:
def cuisine_type(df, cuisine_col):
    if df[cuisine_col] == 0:
        return 'no'
    elif df[cuisine_col] == 1 and df['cuisine_cnt'] == 1:
        return 'only'
    else:
        return 'one of'

def define_types_by_cuisine(df, cuisine_col): 
    cuisines_columns = [col for col in df.columns if 'cuisines_' in col and '_cnt' not in col]
    df['cuisine_cnt'] = df.apply(lambda x: sum([x[col] for col in cuisines_columns]), axis=1)
    df['cuisine_type'] = df.apply(lambda x: cuisine_type(x, cuisine_col), axis=1)
    df = df.drop(['cuisine_cnt'], axis=1)
    return df

In [253]:
def compare_by_feature2(df):

    df2 = df[
        ['restaurant_link','total_reviews_count','avg_rating','cuisine_type']
    ] # [df['total_reviews_count'] >= 20]
#     df2['feature_flg'] = df2[features_col].apply(lambda x: 1 if feature in str(x) else 0)

    df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']

    df2_agg = df2.groupby('cuisine_type').agg({'restaurant_link': 'count', 'summary_score': 'sum', 'total_reviews_count': 'sum'})
    df2_agg['total_avg_rating'] = df2_agg['summary_score'] / df2_agg['total_reviews_count']

    return df2_agg[['restaurant_link', 'total_avg_rating']]



In [None]:
def filter_with_threshold2(df):
#     size of the minimal group
    min_group_size_ratio = df['restaurant_link'].min() / df['restaurant_link'].sum()
#     ratio of difference between ratings (for group with feature and group without feature) and rating of base option (group without feature)
    diff_rating_ratio = abs((df['total_avg_rating'].max() - df['total_avg_rating'].min()) / df['total_avg_rating'].loc[0])
    
    if len(df['total_avg_rating']) == 2:
        if df['total_avg_rating'].loc[0] < df['total_avg_rating'].loc[1]:
            featured_group_rating = 'better'
        elif df['total_avg_rating'].loc[0] > df['total_avg_rating'].loc[1]:
            featured_group_rating = 'worse'
        else:
            featured_group_rating = 'the same'
    else:
        featured_group_rating = 'not applicable'
    return min_group_size_ratio, diff_rating_ratio, featured_group_rating


In [264]:
def print_significant_groups2(df, country):
#     , group_size_threshold, rating_diff_threshold
    
    country_df = define_country_df(df, country)
    cuisines_columns = [col for col in country_df.columns if 'cuisines_' in col and '_cnt' not in col]
    
    for cuisine in cuisines_columns:
        country_df2 = define_types_by_cuisine(country_df, cuisine) 
        country_df2_agg = compare_by_feature2(country_df2)
        
        print()
        print(cuisine)
        print(country_df2_agg)
        print()
        print()


#             df_compare = compare_by_feature2(df)
#             param_filter = filter_with_threshold(df_compare)
#             if (param_filter[0] > group_size_threshold 
#                 and 
#                 param_filter[1] > rating_diff_threshold
#                ):
#                 print()
#                 print(col, '-', feature)
#                 print(df_compare)
#                 print("Ratio of the smallest group:\t{0:.3f}".format(param_filter[0]))
#                 print("Variation (??):\t\t\t{0:.3f}".format(param_filter[1]))
#                 if param_filter[2] == 'not applicable':
#                     print(param_filter[2])
#                 else: 
#                     print("The featured group is " + str.upper(param_filter[2]))
            
    return 1

In [261]:
# germany_df = define_country_df(rest_df_core, 'Germany')
# germany_df2 = define_types_by_cuisine(germany_df, 'cuisines_Pizza') 
# germany_df2_agg = compare_by_feature2(germany_df2)

# germany_df2_agg

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']


Unnamed: 0_level_0,restaurant_link,total_avg_rating
cuisine_type,Unnamed: 1_level_1,Unnamed: 2_level_1
no,106575,4.064656
one of,6944,3.937707
only,1814,3.872732


In [None]:
# feature_columns = ['price_level', 'meals', 'cuisines', 'special_diets', 'features']

# featured_cnt_threshold = 20
# group_size_threshold = 0.1
# rating_diff_threshold = 0.05

In [265]:
for country in [f[0] for f in rating_of_column_values(rest_df, 'country', separator=', ')]:
    print()
    print()
    print(str.upper(country))
    print()
    print_significant_groups2(rest_df_core, country)
# feature_columns, featured_cnt_threshold
#     , group_size_threshold, rating_diff_threshold
    



ITALY



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating


cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      59651          4.033788
one of                 115514          4.065837
only                    49598          4.114026




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     166113          4.034643
one of                  57681          4.092535
only                      969          4.188410




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     169331          4.128405
one of                  51002          3.914043
only                     4430          4.025409




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Seafood
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     181877          4.086895
one of                  39493          4.036251
only                     3393          3.985193




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     191261          4.071529
one of                  32769          4.049503
only                      733          3.930922




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     205282          4.070908
one of                  18545          3.994607
only                      936          4.058434




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     208991          4.075583
one of                  12049          3.890802
only                     3723          3.915206




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Southern-Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     209688          4.062075
one of                  15075          4.098068




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     211782          4.057316
one of                  12489          4.114938
only                      492          4.025675




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)



cuisines_Barbecue
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     213081          4.068375
one of                  11682          4.058747




SPAIN



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Spanish
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      71506          4.166442
one of                  53369          4.095530
only                    32604          3.866617




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     103146          4.047300
one of                  48824          4.124652
only                     5509          4.192728




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     133508          4.048297
one of                  22255          4.149029
only                     1716          4.261278




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     144113          4.099919
one of                   8579          4.007374
only                     4787          4.014048




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     139246          4.098021
one of                  16834          4.074546
only                     1399          4.161931




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     146762          4.089853
one of                   8817          4.161098
only                     1900          4.126549




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     146827          4.095844
one of                   9851          4.089272
only                      801          4.189833




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     148700          4.096292
one of                   7199          4.104070
only                     1580          3.683640




FRANCE



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating


cuisines_French
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      70645          3.964291
one of                  35204          4.075237
only                    49439          4.046407




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     123451          4.013787
one of                  29852          4.066877
only                     1985          3.994397




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     139037          4.047353
one of                  12754          4.004939
only                     3497          3.878297




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     142759          4.042843
one of                  10268          4.019818
only                     2261          3.813097




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     144391          4.025454
one of                   8029          4.119485
only                     2868          4.202649




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Asian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     145703          4.043727
one of                    765          4.148355
only                     8820          3.976262




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     146159          4.048226
one of                   3504          4.153214
only                     5625          3.455308




ENGLAND



GERMANY



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_German
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      82766          4.067180
one of                  14772          4.050218
only                    17795          3.993489




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      94124          4.051138
one of                  17983          4.058715
only                     3226          4.052994




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      99941          4.062290
one of                  11192          4.016125
only                     4200          3.981891




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     103690          4.053404
one of                   4834          4.017629
only                     6809          4.139856




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     105790          4.047926
one of                   7924          4.075444
only                     1619          4.208399




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     106364          4.046595
one of                   8969          4.078132




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     106575          4.064656
one of                   6944          3.937707
only                     1814          3.872732




GREECE



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Greek
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      11572          4.338830
one of                  16064          4.356065
only                     6127          4.302624




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      19877          4.329275
one of                  13362          4.356255
only                      524          4.359284




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Seafood
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      28222          4.356392
one of                   5330          4.333026
only                      211          4.329437




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      28573          4.348066
one of                   3558          4.357849
only                     1632          4.355189




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      29411          4.335121
one of                   4144          4.389218
only                      208          4.379649




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      28895          4.348839
one of                   4597          4.352728
only                      271          4.262208




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      31668          4.349563
one of                   1619          4.335406
only                      476          4.341703




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      31834          4.347260
one of                   1841          4.409061
only                       88          4.327904




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Steakhouse
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      31976          4.348486
one of                   1495          4.361232
only                      292          4.311871




PORTUGAL



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      11137          4.266978
one of                  20865          4.186011
only                      590          4.226711




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Portuguese
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      11240          4.279369
one of                  20485          4.186672
only                      867          4.158005




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      25725          4.172143
one of                   6672          4.241864
only                      195          4.427437




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      29885          4.211009
one of                   2054          4.178022
only                      653          4.224958




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Seafood
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      30191          4.225976
one of                   2289          4.137606
only                      112          4.328617




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      30610          4.210553
one of                   1308          4.188156
only                      674          4.237790




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      29039          4.210903
one of                   2920          4.205129
only                      633          4.159041




THE NETHERLANDS



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      14439          4.118187
one of                  14524          4.102265
only                      829          4.043622




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Dutch
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      17382          4.115364
one of                  12089          4.096016
only                      321          4.104415




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Seafood
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      25033          4.087134
one of                   4361          4.154025
only                      398          4.118220




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_International
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      26721          4.094180
one of                   2558          4.165637
only                      513          4.178365




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      27165          4.113575
one of                   1678          4.052103
only                      949          3.966848




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Asian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      27224          4.106098
one of                    999          4.041223
only                     1569          4.144495




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      27519          4.100630
one of                   1551          4.160870
only                      722          4.228776




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_French
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      27749          4.086951
one of                   1887          4.316136
only                      156          4.295699




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      27352          4.128072
one of                   2222          4.008940
only                      218          3.969260




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      28068          4.101697
one of                   1383          4.146010
only                      341          4.210693




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      28147          4.108850
one of                   1495          4.060202
only                      150          4.084672




POLAND



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      14389          4.191901
one of                   9461          4.208695
only                      848          4.201198




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Polish
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      15833          4.205555
one of                   8512          4.200063
only                      353          4.193727




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      21418          4.198809
one of                   2359          4.229156
only                      921          4.192383




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      22337          4.206623
one of                   1637          4.186167
only                      724          3.760306




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      22443          4.194757
one of                    943          4.275761
only                     1312          4.304793




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      23151          4.185964
one of                   1547          4.237839




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      23344          4.207275
one of                    844          4.076857
only                      510          4.252413




BELGIUM



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      11783          4.003688
one of                  11082          4.004711
only                      846          4.009221




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Belgian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      13719          4.015176
one of                   9680          4.005015
only                      312          3.883157




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_French
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      19077          3.962370
one of                   3798          4.102869
only                      836          4.132245




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      20665          4.012819
one of                   1566          3.973320
only                     1480          3.867856




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Asian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      22171          4.010966
one of                    171          3.912852
only                     1369          3.910743




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      22284          3.993844
one of                    963          4.091940
only                      464          4.155721




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      22007          4.014431
one of                   1528          3.929266
only                      176          4.052672




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      22446          4.004873
one of                   1115          3.993790
only                      150          4.092561




AUSTRIA



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       9737          4.089583
one of                  10325          4.132836
only                      425          4.130931




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Austrian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      11083          4.091044
one of                   9146          4.139562
only                      258          4.126859




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      17744          4.095427
one of                   2743          4.155175




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      18282          4.121414
one of                   1389          4.110040
only                      816          4.076610




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      18468          4.134121
one of                   1644          4.008756
only                      375          4.039313




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      18984          4.133804
one of                   1303          3.962267
only                      200          4.058394




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_German
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      19074          4.119880
one of                   1290          4.117139
only                      123          4.085593




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_International
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      19296          4.107699
one of                    979          4.227707
only                      212          4.171850




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      19316          4.123422
one of                   1034          4.085728
only                      137          4.076328




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      18455          4.118131
one of                   1806          4.129683
only                      226          4.124946




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      19354          4.118784
one of                   1027          4.131525
only                      106          4.097015




SWEDEN



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      13229          3.907584
one of                   4962          4.011996
only                      364          3.979256




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Swedish
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      14155          3.893930
one of                   4289          4.055494
only                      111          4.136596




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      16517          3.953452
one of                    833          4.056167
only                     1205          4.046504




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      16803          3.972865
one of                    693          3.877384
only                     1059          3.691946




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Scandinavian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      16812          3.898576
one of                   1729          4.109858
only                       14          4.069464




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Asian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      17475          3.959930
one of                    119          4.002696
only                      961          3.981498




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      17507          3.965349
one of                    715          3.934185
only                      333          3.916628




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      17072          3.984383
one of                   1217          3.854644
only                      266          3.913796




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      17572          3.975848
one of                    825          3.792119
only                      158          3.673298




CZECH REPUBLIC



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8452          4.219981
one of                   6077          4.107937
only                      315          4.149178



cuisines_Czech
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       9450          4.243460
one of                   5263          4.049794
only                      131          4.001024




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      12905          4.135475
one of                    734          4.278389
only                     1205          4.155173



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      13176          4.178008
one of                   1668          4.103645




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      13570          4.164095
one of                   1129          4.028210
only                      145          4.223311



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      13255          4.172541
one of                   1425          4.069387
only                      164          4.220908




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      13682          4.141382
one of                    698          4.168851
only                      464          4.213719



cuisines_International
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      13811          4.124756
one of                    829          4.258593
only                      204          4.319110




SCOTLAND



IRELAND



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating


cuisines_Irish
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6623          4.227371
one of                   3831          4.304638
only                      749          4.258290



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8311          4.228842
one of                   2644          4.308311
only                      248          4.316857




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8757          4.272852
one of                   1445          4.306341
only                     1001          4.168774



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       9179          4.287165
one of                   1954          4.259886
only                       70          3.938714




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       9412          4.278090
one of                   1680          4.262222
only                      111          4.308532



cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      10218          4.285556
one of                    508          3.981231
only                      477          3.964356




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      10330          4.282452
one of                    697          4.179084
only                      176          4.020039



cuisines_Asian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      10340          4.276318
one of                    421          4.016558
only                      442          4.330480




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else


cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      10498          4.280224
one of                    600          4.201725
only                      105          4.312705



cuisines_Chinese
              restaurant_link  total_avg_rating
cuisine_type                                   
no                      10601          4.279710
one of                    362          3.974199
only                      240          3.748694




DENMARK



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6295          4.011159
one of                   2947          4.041122
only                      243          4.068836



cuisines_Danish
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6711          3.993993
one of                   2676          4.066266
only                       98          4.120961




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8197          4.034311
one of                    713          4.011559
only                      575          3.975871



cuisines_Scandinavian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8419          3.994545
one of                   1057          4.103592
only                        9          3.882440




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8638          4.034363
one of                    546          3.993135
only                      301          4.019357



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8774          4.043308
one of                    439          3.892052
only                      272          3.601570




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else


cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8995          4.030371
one of                    239          4.149162
only                      251          3.872809



cuisines_Asian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       8996          4.027290
one of                     44          4.265103
only                      445          4.052935




WALES



CROATIA



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       2980          4.256421
one of                   5296          4.220916
only                       99          4.252800



cuisines_Croatian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3088          4.259518
one of                   5208          4.223634
only                       79          4.099565




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       4846          4.220790
one of                   3398          4.235360
only                      131          3.983919



cuisines_Seafood
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       5656          4.241846
one of                   2653          4.215374
only                       66          4.414843




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7083          4.243082
one of                   1193          4.161125
only                       99          4.191120



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7307          4.236086
one of                   1001          4.192803
only                       67          4.323889




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Barbecue
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7438          4.227191
one of                    937          4.247863



cuisines_Grill
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7681          4.222383
one of                    661          4.325325
only                       33          4.441000




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7708          4.230333
one of                    362          4.170887
only                      305          4.283576



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6846          4.224567
one of                   1385          4.261006
only                      144          4.094841




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else


cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7874          4.220619
one of                    501          4.303631



cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7934          4.226946
one of                    303          4.366665
only                      138          4.184249




ROMANIA



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3974          4.229125
one of                   3646          4.058499
only                      222          4.253667



cuisines_Romanian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       5094          4.225434
one of                   2695          3.963091
only                       53          3.984666




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6300          4.103328
one of                   1285          4.152129
only                      257          4.100870



cuisines_International
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6551          4.088406
one of                   1020          4.169012
only                      271          4.382790




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6825          4.126676
one of                    827          4.021295
only                      190          3.674223



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6886          4.089488
one of                    501          4.272544
only                      455          4.536946




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7133          4.088498
one of                    604          4.250408
only                      105          3.875254



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6929          4.127157
one of                    876          4.070914
only                       37          4.108890




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Eastern European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7176          4.161751
one of                    666          3.976717



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7208          4.112881
one of                    572          4.102632
only                       62          4.097987




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7341          4.120698
one of                    501          4.081726



cuisines_American
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7357          4.102680
one of                    419          4.245214
only                       66          4.433225




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7445          4.113457
one of                    249          4.131825
only                      148          3.818575




HUNGARY



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3620          4.251504
one of                   3687          4.221533
only                      124          4.405049



cuisines_Hungarian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       4143          4.230894
one of                   3222          4.232403
only                       66          4.294465




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6322          4.228823
one of                   1109          4.242445



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6429          4.238118
one of                    538          4.217064
only                      464          4.190452




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6608          4.236641
one of                    667          4.207746
only                      156          4.407335



cuisines_International
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6654          4.216375
one of                    660          4.313201
only                      117          4.521345




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6896          4.242536
one of                    428          4.147858
only                      107          3.796662



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6755          4.245871
one of                    603          4.161798
only                       73          4.368958




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']



cuisines_Eastern European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6966          4.236469
one of                    465          4.230947



cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6988          4.236927
one of                    294          4.234567
only                      149          3.992601




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else


cuisines_American
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6987          4.236016
one of                    321          4.225590
only                      123          4.176530



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       7005          4.243445
one of                    381          4.014713
only                       45          4.160251




FINLAND



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rati


cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       5231          3.975689
one of                   1658          4.053444
only                      483          3.915894



cuisines_Scandinavian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6168          3.937490
one of                   1178          4.133443
only                       26          4.259456



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6193          3.987437
one of                    547          4.175617
only                      632          4.158664




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rati


cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6571          4.035972
one of                    478          3.840165
only                      323          3.757252



cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6772          4.019875
one of                    493          3.899293
only                      107          3.927454



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6578          4.023420
one of                    682          3.944042
only                      112          3.951488




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rati


cuisines_Asian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6835          4.004617
one of                     48          3.812908
only                      489          4.074131



cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6893          4.020035
one of                    262          3.814783
only                      217          3.759460



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       6951          4.029889
one of                    293          3.845577
only                      128          3.785099




BULGARIA



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating


cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       2409          4.224220
one of                   1794          4.242219
only                      266          4.090053



cuisines_Eastern European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3489          4.200636
one of                    980          4.285220



cuisines_Mediterranean
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3805          4.219329
one of                    598          4.279367
only                       66          3.879753




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rati


cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3849          4.239865
one of                    501          4.186460
only                      119          4.275510



cuisines_International
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3893          4.226247
one of                    438          4.268124
only                      138          4.169685



cuisines_Seafood
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3949          4.232112
one of                    453          4.243710
only                       67          4.130137




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rati


cuisines_Barbecue
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3958          4.243593
one of                    511          4.190008



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3461          4.249214
one of                    972          4.210042
only                       36          4.186153



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       4003          4.252607
one of                    400          4.078843
only                       66          4.010669



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       4007          4.224696
one of                    262          4.462051
only                      200          4.301582




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rati


cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       4023          4.229712
one of                    417          4.264028
only                       29          4.360979



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       4109          4.216954
one of                    360          4.282494



cuisines_Grill
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       4212          4.228529
one of                    231          4.330711
only                       26          4.241158




SLOVAKIA



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_' + t] = df[col].apply(lambda x: 1 if t in str(x) else 


cuisines_European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       2793          4.196165
one of                   1053          4.114972
only                      405          4.065649



cuisines_Central European
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3621          4.157368
one of                    630          4.114146



cuisines_Cafe
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3652          4.116258
one of                    242          4.229092
only                      357          4.430540



cuisines_Italian
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3841          4.143126
one of                    281          4.099083
only                      129          4.293008




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rati


cuisines_Pub
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3851          4.147079
one of                    296          4.103129
only                      104          4.056854



cuisines_Pizza
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3874          4.153836
one of                    240          4.024582
only                      137          3.742709



cuisines_Bar
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3785          4.131265
one of                    413          4.134777
only                       53          4.522255



cuisines_International
              restaurant_link  total_avg_rating
cuisine_type                                   
no                       3963          4.127380
one of                    194          4.228610
only          

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']


In [None]:
'''

FRANCE

cuisines_Fast food
              restaurant_link  total_avg_rating
cuisine_type                                   
no                     146159          4.048226
one of                   3504          4.153214
only                     5625          3.455308

'''

In [225]:
# italy_df.shape

In [224]:
# italy_df[italy_df['cuisines_cnt'] == 1].shape

In [223]:
feature_columns = list(set(italy_df.columns) - set(['restaurant_link',
 'country',
 'total_reviews_count',
 'avg_rating',
 'price_level',
 'cuisines_cnt','meals_cnt'
                                                   ]))
feature_columns

['cuisines_Seafood',
 'features_Wheelchair Accessible',
 'features_Seating',
 'features_Table Service',
 'special_diets_Gluten Free Options',
 'features_Serves Alcohol',
 'meals_Drinks',
 'special_diets_Vegetarian Friendly',
 'price_level_€',
 'features_Reservations',
 'cuisines_Mediterranean',
 'cuisines_Italian',
 'price_level_€€-€€€',
 'meals_Dinner',
 'cuisines_Pizza',
 'special_diets_Vegan Options',
 'meals_Breakfast',
 'features_Takeout',
 'meals_Lunch']

In [195]:
cuisines_columns = [col for col in feature_columns if 'cuisines_' in col]
cuisines_columns

['cuisines_Seafood',
 'cuisines_Mediterranean',
 'cuisines_Pizza',
 'cuisines_Italian']

In [182]:
pd.DataFrame(italy_df.groupby(feature_columns).size().sort_values(ascending=False))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,0
cuisines_Seafood,features_Reservations,features_Wheelchair Accessible,cuisines_Mediterranean,features_Seating,cuisines_Pizza,special_diets_Vegetarian Friendly,special_diets_Vegan Options,features_Table Service,features_Serves Alcohol,meals_Breakfast,cuisines_Italian,meals_Drinks,meals_Dinner,meals_Lunch,features_Takeout,special_diets_Gluten Free Options,Unnamed: 17_level_1
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21121
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,16480
0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,3957
0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,3137
0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,2984
0,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1,1,0,0,0,0,0,0,1,1,1,0,0,1,1,0,1
0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1,1
0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,1
0,1,1,0,0,0,0,0,0,1,1,1,0,1,1,0,0,1


In [193]:
italy_test1_df = italy_df[
    (italy_df['cuisines_Italian'] == 1) 
    & 
    (sum([italy_df[col] for col in list(set(feature_columns) - set(['cuisines_Italian']))]) == 0)
]

italy_test1_df.shape

(16480, 22)

In [194]:
italy_test1_df

Unnamed: 0,restaurant_link,country,total_reviews_count,avg_rating,price_level,features_Reservations,features_Seating,features_Table Service,features_Wheelchair Accessible,features_Serves Alcohol,...,meals_Lunch,meals_Breakfast,meals_Drinks,cuisines_Italian,cuisines_Mediterranean,cuisines_Pizza,cuisines_Seafood,special_diets_Vegetarian Friendly,special_diets_Gluten Free Options,special_diets_Vegan Options
650043,g10029467-d18338571,Italy,5.0,5.0,€,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
650054,g1005744-d14918663,Italy,2.0,4.5,,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
650063,g1005933-d11961619,Italy,1.0,3.0,€€-€€€,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
650067,g1005933-d14188705,Italy,6.0,2.5,€,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
650068,g1005933-d15565333,Italy,2.0,5.0,,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874758,g9710376-d11736725,Italy,0.0,,€€-€€€,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
874779,g9784246-d13986483,Italy,2.0,4.0,,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
874781,g9786965-d19780255,Italy,0.0,,,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
874790,g984172-d19268829,Italy,0.0,,€€-€€€,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


# 2.2 Hypothesis

In [39]:
bavaria_df = ger_df[ger_df['region'] == 'Bavaria']
bavaria_df.shape

(21531, 28)

In [48]:
ger_df.groupby('region').size().sort_values(ascending=False)

region
Bavaria                       21531
North Rhine-Westphalia        21116
Baden-Wurttemberg             16236
Hesse                          9845
Lower Saxony                   9232
Berlin                         7217
Rhineland-Palatinate           6651
Saxony                         4510
Schleswig-Holstein             4206
Hamburg                        3625
Mecklenburg-West Pomerania     2634
Brandenburg                    2378
Thuringia                      1986
Saxony-Anhalt                  1799
Saarland                       1302
State of Bremen                1062
dtype: int64

In [51]:
bavaria_df = ger_df[ger_df['region'] == 'Bavaria']
bavaria_df.groupby('city').size().sort_values(ascending=False)

city
Munich          3508
Nuremberg       1113
Augsburg         474
Regensburg       379
Wurzburg         326
                ... 
Rottau             1
Babensham          1
Burgwallbach       1
Lamerdingen        1
Ebersdorf          1
Length: 1707, dtype: int64

In [50]:
rhine_df = ger_df[ger_df['region'] == 'North Rhine-Westphalia']
rhine_df.groupby('city').size().sort_values(ascending=False)

city
Cologne       2399
Dusseldorf    1683
Dortmund       764
Essen          678
Bonn           653
              ... 
Hembergen        1
Flueren          1
Enniger          1
Elmpt            1
Zuschen          1
Length: 462, dtype: int64

In [91]:
def compare_by_feature(df, features_col, feature):

    df2 = df[
        ['restaurant_link','total_reviews_count','avg_rating',features_col]
    ][df['total_reviews_count'] >= 20]
    df2['feature_flg'] = df2[features_col].apply(lambda x: 1 if feature in str(x) else 0)

    df2['summary_score'] = df2['total_reviews_count'] * df2['avg_rating']

    df2_agg = df2.groupby('feature_flg').agg({'restaurant_link': 'count', 'summary_score': 'sum', 'total_reviews_count': 'sum'})
    df2_agg['total_avg_rating'] = df2_agg['summary_score'] / df2_agg['total_reviews_count']

    return df2_agg[['restaurant_link', 'total_avg_rating']]



In [143]:
def filter_with_threshold(df):
#     size of the minimal group
    min_group_size_ratio = df['restaurant_link'].min() / df['restaurant_link'].sum()
#     ratio of difference between ratings (for group with feature and group without feature) and rating of base option (group without feature)
    diff_rating_ratio = abs((df['total_avg_rating'].max() - df['total_avg_rating'].min()) / df['total_avg_rating'].loc[0])
    
    if len(df['total_avg_rating']) == 2:
        if df['total_avg_rating'].loc[0] < df['total_avg_rating'].loc[1]:
            featured_group_rating = 'better'
        elif df['total_avg_rating'].loc[0] > df['total_avg_rating'].loc[1]:
            featured_group_rating = 'worse'
        else:
            featured_group_rating = 'the same'
    else:
        featured_group_rating = 'not applicable'
    return min_group_size_ratio, diff_rating_ratio, featured_group_rating


In [226]:
def print_significant_groups(df, feature_columns, featured_cnt_threshold, group_size_threshold, rating_diff_threshold):
    
    for col in feature_columns:
        list_of_features = [f[0] for f in rating_of_column_values(df, col, separator=', ') if f[1] >= featured_cnt_threshold]
        for feature in list_of_features:
            df_compare = compare_by_feature(df, col, feature)
            param_filter = filter_with_threshold(df_compare)
            if (param_filter[0] > group_size_threshold 
                and 
                param_filter[1] > rating_diff_threshold
               ):
                print()
                print(col, '-', feature)
                print(df_compare)
                print("Ratio of the smallest group:\t{0:.3f}".format(param_filter[0]))
                print("Variation (??):\t\t\t{0:.3f}".format(param_filter[1]))
                if param_filter[2] == 'not applicable':
                    print(param_filter[2])
                else: 
                    print("The featured group is " + str.upper(param_filter[2]))
#             else:
#                 print('the minimal group is too small or the difference < 5%')
            
    return 1

In [107]:
# filter_with_threshold(compare_by_feature(bavaria_df, 'cuisines', 'Seafood'))

In [118]:
feature_columns = ['meals', 'cuisines', 'special_diets', 'features']

featured_cnt_threshold = 20
group_size_threshold = 0.03
rating_diff_threshold = 0.05

In [162]:
print_significant_groups(bavaria_df, feature_columns, featured_cnt_threshold, group_size_threshold, rating_diff_threshold)




special_diets - Gluten Free Options
             restaurant_link  total_avg_rating
feature_flg                                   
0                       7254          4.023327
1                        901          4.228804
Ratio of the smallest group:	0.110
Variation (??):			0.051
The featured group is BETTER


1

In [158]:
print_significant_groups(rhine_df, feature_columns, featured_cnt_threshold, group_size_threshold, rating_diff_threshold)



1

In [159]:
feature_columns = ['price_level', 'meals', 'cuisines', 'special_diets', 'features']

featured_cnt_threshold = 20
group_size_threshold = 0.1
rating_diff_threshold = 0.05

In [163]:
for country in [f[0] for f in rating_of_column_values(rest_df, 'country', separator=', ')]:
    print()
    print()
    print(str.upper(country))
    print()
    print_significant_groups(rest_df_upd[rest_df_upd['country'] == country], feature_columns, featured_cnt_threshold, group_size_threshold, rating_diff_threshold)




ITALY


cuisines - Pizza
             restaurant_link  total_avg_rating
feature_flg                                   
0                      93297          4.129211
1                      39474          3.914583
Ratio of the smallest group:	0.297
Variation (??):			0.052
The featured group is WORSE

special_diets - Vegetarian Friendly
             restaurant_link  total_avg_rating
feature_flg                                   
0                      60261          3.896782
1                      72510          4.135452
Ratio of the smallest group:	0.454
Variation (??):			0.061
The featured group is BETTER

special_diets - Gluten Free Options
             restaurant_link  total_avg_rating
feature_flg                                   
0                     101810          3.927545
1                      30961          4.216755
Ratio of the smallest group:	0.233
Variation (??):			0.074
The featured group is BETTER

special_diets - Vegan Options
             restaurant_link  total_avg_r



THE NETHERLANDS


special_diets - Vegan Options
             restaurant_link  total_avg_rating
feature_flg                                   
0                      11166          4.006961
1                       3560          4.246004
Ratio of the smallest group:	0.242
Variation (??):			0.060
The featured group is BETTER

special_diets - Gluten Free Options
             restaurant_link  total_avg_rating
feature_flg                                   
0                      11313          3.974716
1                       3413          4.267141
Ratio of the smallest group:	0.232
Variation (??):			0.074
The featured group is BETTER


POLAND


special_diets - Vegetarian Friendly
             restaurant_link  total_avg_rating
feature_flg                                   
0                       5228          4.058677
1                       3322          4.281703
Ratio of the smallest group:	0.389
Variation (??):			0.055
The featured group is BETTER

special_diets - Vegan Options
       


special_diets - Vegan Options
             restaurant_link  total_avg_rating
feature_flg                                   
0                       2231          4.124633
1                        790          4.339146
Ratio of the smallest group:	0.262
Variation (??):			0.052
The featured group is BETTER

special_diets - Gluten Free Options
             restaurant_link  total_avg_rating
feature_flg                                   
0                       2452          4.093224
1                        569          4.356371
Ratio of the smallest group:	0.188
Variation (??):			0.064
The featured group is BETTER


FINLAND


cuisines - Pizza
             restaurant_link  total_avg_rating
feature_flg                                   
0                       2034          4.048825
1                        273          3.836363
Ratio of the smallest group:	0.118
Variation (??):			0.052
The featured group is WORSE

special_diets - Vegan Options
             restaurant_link  total_avg_ratin

In [None]:
'''
IDEALLY: if I compare by 1 feature, other features must be the same

BUT: restaurants with 'special_diets' are BETTER almost always
'''

In [None]:
'''
CHECK correlations between features and target?
'''

In [None]:
'''
CREATE A NEW TABLE with flags of features -- it will be fast to calculate once, not every iteration
'''

In [None]:
'''
SPAIN

special_diets - Vegetarian Friendly
             restaurant_link  total_avg_rating
feature_flg                                   
0                      43386          3.848150
1                      36371          4.221928
Ratio of the smallest group:	0.456
Variation (??):			0.097
The featured group is BETTER


CZECH REPUBLIC

features - Outdoor Seating
             restaurant_link  total_avg_rating
feature_flg                                   
0                       4113          4.203806
1                        546          3.920962
Ratio of the smallest group:	0.117
Variation (??):			0.067
The featured group is WORSE
'''

In [None]:
'''
NEED TO FILTER CASES WITH TOO SMALL SIZE OF THE SMALLEST GROUP !!!
'''

In [None]:
'''

cuisines - Fast food
0                       7941          4.080257
1                        214          3.930997

cuisines - Sushi
0                       7887          4.080588
1                        268          3.955578

cuisines - Seafood
0                       7871          4.066961
1                        284          4.307471

cuisines - Chinese
0                       7952          4.082207
1                        203          3.736369

cuisines - Mongolian
0                       8106          4.079393
1                         49          3.516608

cuisines - Eastern European
0                       8143          4.076798
1                         12          4.604167


special_diets - Vegan Options
0                       6402          4.016112
1                       1753          4.201349

special_diets - Gluten Free Options
0                       7254          4.023327
1                        901          4.228804

'''

In [52]:
# import seaborn as sns
# import matplotlib.pyplot as plt

In [72]:
# sns.histplot(data=bavaria_df[bavaria_df['total_reviews_count'] >= 20], x='total_reviews_count', bins=200)
# plt.xlim([20, 2000])
# plt.ylim([0, 1000])
# plt.show()