# 0.0. Imports

In [1]:
import pandas                       as pd
import numpy                        as np
import plotly.express               as px
import plotly.graph_objects         as go
import streamlit                    as st
import inflection
import folium

from streamlit_folium               import folium_static
from streamlit_option_menu          import option_menu
from PIL                            import Image
from folium.plugins                 import MarkerCluster
from haversine                      import haversine

## 0.2. Helper Functions

In [2]:
#Renomear as colunas do df
def rename_columns(df):
    title = lambda x: inflection.titleize(x)
    snakecase = lambda x: inflection.underscore(x)
    spaces = lambda x: x.replace(" ","")
    cols_old = list(df.columns)
    cols_old = list(map(title, cols_old))
    cols_old = list(map(spaces, cols_old))
    cols_new = list(map(snakecase, cols_old))
    df.columns = cols_new
    return df

In [3]:
# Excluir linhas NaN e duplicadas
def clean_code (df):
    df.dropna(inplace =True)
    df.drop_duplicates(inplace = True)
    
    return df

In [4]:
# Alterar linhas
def change_rows (df):
    df['cuisines'] = df.loc[ :, "cuisines"].apply(lambda x: x.split(",")[0]) # conterá apenas o primeiro tipo de culinária presente em cada célula
    df["restaurant_name"]= df["restaurant_name"].str.title() # converte a primeira letra de cada palavra em maiúscula e as letras subsequentes em minúscula
    
    return df

In [5]:
# Substituindo código/países
def country_name (df):
    df['country_code'] = df['country_code'].map({
    1: "India",
    14: "Australia",
    30: "Brazil",
    37: "Canada",
    94: "Indonesia",
    148: "New Zeland",
    162: "Philippines",
    166: "Qatar",
    184: "Singapure",
    189: "South Africa",
    191: "Sri Lanka",
    208: "Turkey",
    214: "United Arab Emirates",
    215: "England",
    216: "United States of America",
})
    return df   

In [6]:
# Renomeando linhas de uma coluna específica
def create_price_type (df):
    df['price_range'] = df['price_range'].map({
    1: "Cheap",
    2: "Normal",
    3: "Expensive",
    4: "Gourmet",
})
    return df  

In [7]:
# Definindo cores
def color_name (df):
    df['rating_color'] = df['rating_color'].map({
    '3F7E00': 'darkgreen',
    '5BA829': 'green',
    '9ACD32': 'lightgreen',
    'CDD614': 'orange',
    'FFBA00': 'red',
    'CBCBC8': 'darkred',
    'FF7800': 'darkred',
})
    return df 

In [8]:
#Criando uma coluna com o valor em dólar
def price_in_dollar (df):
    df['price_in_dollar'] = np.round(df[['currency', 'average_cost_for_two']].apply(lambda x: (x['average_cost_for_two'] / 12.85) if x['currency'] == 'Botswana Pula(P)' else
                                                                                     (x['average_cost_for_two'] / 5.31) if x['currency'] == 'Brazilian Real(R$)' else
                                                                                     (x['average_cost_for_two'] / 1) if x['currency'] == 'Dollar($)' else
                                                                                     (x['average_cost_for_two'] / 3.67) if x['currency'] == 'Emirati Diram(AED)' else
                                                                                     (x['average_cost_for_two'] / 82.68) if x['currency'] == 'Indian Rupees(Rs.)' else
                                                                                     (x['average_cost_for_two'] / 15608.45) if x['currency'] == 'Indonesian Rupiah(IDR)' else
                                                                                     (x['average_cost_for_two'] / 1.57) if x['currency'] == 'NewZealand($)' else
                                                                                     (x['average_cost_for_two'] / 0.819257) if x['currency'] == 'Pounds(£)' else
                                                                                     (x['average_cost_for_two'] / 3.64) if x['currency'] == 'Qatari Rial(QR)' else
                                                                                     (x['average_cost_for_two'] / 17.59) if x['currency'] == 'Rand(R)' else
                                                                                     (x['average_cost_for_two'] / 366.86) if x['currency'] == 'Sri Lankan Rupee(LKR)' else
                                                                                     (x['average_cost_for_two'] / 18.65) if x['currency'] == 'Turkish Lira(TL)' else 0, axis = 1),2)
    return df

## 0.3. Loading Data

In [10]:
df = pd.read_csv('../dataset/zomato.csv')
df.head(2).T

Unnamed: 0,0,1
Restaurant ID,6310675,6310675
Restaurant Name,Mama Lou's Italian Kitchen,Mama Lou's Italian Kitchen
Country Code,162,162
City,Las Piñas City,Las Piñas City
Address,"Block 1, Lot 36, Tropical Avenue Corner Tropic...","Block 1, Lot 36, Tropical Avenue Corner Tropic..."
Locality,BF International,BF International
Locality Verbose,"BF International, Las Piñas City","BF International, Las Piñas City"
Longitude,121.009787,121.009787
Latitude,14.447615,14.447615
Cuisines,Italian,Italian


# 1.0. Data Description

In [11]:
df1 = df.copy()

## 1.1. Rename Columns

In [12]:
df1 = rename_columns(df1)
df1.columns

Index(['restaurant_id', 'restaurant_name', 'country_code', 'city', 'address',
       'locality', 'locality_verbose', 'longitude', 'latitude', 'cuisines',
       'average_cost_for_two', 'currency', 'has_table_booking',
       'has_online_delivery', 'is_delivering_now', 'switch_to_order_menu',
       'price_range', 'aggregate_rating', 'rating_color', 'rating_text',
       'votes'],
      dtype='object')

## 1.2. Data Dimensions

In [13]:
print ('Number os Rows: {}'.format (df1.shape[0]))
print ('Number os Columns: {}'.format (df1.shape[1]))

Number os Rows: 7527
Number os Columns: 21


## 1.3. Data Info

In [14]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7527 entries, 0 to 7526
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   restaurant_id         7527 non-null   int64  
 1   restaurant_name       7527 non-null   object 
 2   country_code          7527 non-null   int64  
 3   city                  7527 non-null   object 
 4   address               7527 non-null   object 
 5   locality              7527 non-null   object 
 6   locality_verbose      7527 non-null   object 
 7   longitude             7527 non-null   float64
 8   latitude              7527 non-null   float64
 9   cuisines              7512 non-null   object 
 10  average_cost_for_two  7527 non-null   int64  
 11  currency              7527 non-null   object 
 12  has_table_booking     7527 non-null   int64  
 13  has_online_delivery   7527 non-null   int64  
 14  is_delivering_now     7527 non-null   int64  
 15  switch_to_order_menu 

## 1.4. Check NA

In [15]:
df1.isna().sum()

restaurant_id            0
restaurant_name          0
country_code             0
city                     0
address                  0
locality                 0
locality_verbose         0
longitude                0
latitude                 0
cuisines                15
average_cost_for_two     0
currency                 0
has_table_booking        0
has_online_delivery      0
is_delivering_now        0
switch_to_order_menu     0
price_range              0
aggregate_rating         0
rating_color             0
rating_text              0
votes                    0
dtype: int64

## 1.5. Fillout NA

In [16]:
df1 = clean_code (df1)

# 2.0. Feature Engineering

In [17]:
df2 = df1.copy()

In [18]:
# Alterar linhas das variáveis cuisines e restaurante_name
df2 = change_rows (df2)

# Substituindo código dos países pelo nome
df2 = country_name(df2)

# Atribuindo rótulos nas linhas da variável price_range
df2 = create_price_type(df2) 

# Definindo cores na variável rating_color (coordenadas no mapa)
df2 = color_name(df2)                                                                  

# Criando uma variável com o valor da variável average_cost_for_two em dólar
df2 = price_in_dollar(df2)

In [19]:
df2.head(1).T

Unnamed: 0,0
restaurant_id,6310675
restaurant_name,Mama Lou'S Italian Kitchen
country_code,Philippines
city,Las Piñas City
address,"Block 1, Lot 36, Tropical Avenue Corner Tropic..."
locality,BF International
locality_verbose,"BF International, Las Piñas City"
longitude,121.009787
latitude,14.447615
cuisines,Italian


# 3.0. Exploratory Data Analysis

## 3.1. Análise Geral

### 1. Quantos restaurantes únicos estão registrados?

In [15]:
df['restaurant_name'].nunique()

5891

### 2. Quantos países únicos estão registrados?

In [16]:
df['country_code'].nunique()

15

### 3. Quantas cidades únicas estão registradas?

In [17]:
df['city'].nunique()

125

### 4. Qual é o total de avaliações feitas?

In [18]:
df['aggregate_rating'].count()

6929

### 5. Qual é o total de tipos de culinária registrados?

In [19]:
df['cuisines'].nunique()

165

## 3.2. Análise País

### 1. Qual o nome do país que possui mais cidades registradas?

In [20]:
df.loc[:, ['country_code', 'city']].groupby('country_code').nunique().sort_values(by = 'city', ascending = False).head(1).reset_index()

Unnamed: 0,country_code,city
0,India,49


### 2. Qual o nome do país que possui mais restaurantes registrados?

In [21]:
df.loc[:, ['country_code', 'restaurant_name']].groupby('country_code').nunique().sort_values(by = 'restaurant_name', ascending = False).head(1).reset_index()

Unnamed: 0,country_code,restaurant_name
0,India,2474


### 3. Qual o nome do país que possui mais restaurantes com o nível de preço igual a 4 registrados?

In [22]:
df.loc[df['price_range'] == "Gourmet", ['country_code', 'price_range']].groupby('country_code').size().sort_values(ascending = False).head(1).reset_index()

Unnamed: 0,country_code,0
0,United States of America,415


### 4. Qual o nome do país que possui a maior quantidade de tipos de culinária distintos?

In [23]:
df.loc[:, ['country_code', 'cuisines']].groupby('country_code').nunique().sort_values(by = 'cuisines', ascending = False).head(1).reset_index()

Unnamed: 0,country_code,cuisines
0,India,77


### 5. Qual o nome do país que possui a maior quantidade de avaliações feitas?

In [24]:
df.loc[:, ['country_code', 'aggregate_rating']].groupby('country_code').size().sort_values(ascending = False).head(1).reset_index()

Unnamed: 0,country_code,0
0,India,3111


### 6. Qual o nome do país que possui a maior quantidade de restaurantes que fazem entrega?

In [25]:
df.loc[df['has_online_delivery'] == 1, 'country_code'].value_counts().reset_index()

Unnamed: 0,index,country_code
0,India,2177
1,United Arab Emirates,205
2,Qatar,37
3,Philippines,9


### 7. Qual o nome do país que possui a maior quantidade de restaurantes que aceitam reservas?

In [26]:
(df.loc[df['has_table_booking'] == 1, ['city', 'country_code']]
   .groupby('country_code')
   .size()
   .sort_values(ascending = False)
   .head()
   .reset_index())

Unnamed: 0,country_code,0
0,India,256
1,England,55
2,Australia,29
3,Philippines,22
4,New Zeland,19


### 8. Qual o nome do país que possui, na média, a maior nota média registrada?

In [27]:
np.round (df.loc[:, ['country_code', 'aggregate_rating']].groupby('country_code').mean().sort_values('aggregate_rating', ascending = False).head(5).reset_index(),2)

Unnamed: 0,country_code,aggregate_rating
0,Indonesia,4.6
1,Philippines,4.46
2,Singapure,4.44
3,United States of America,4.4
4,Australia,4.37


### 9. Qual o nome do país que possui, na média, a menor nota média registrada?

In [28]:
np.round(df.loc[:, ['country_code', 'aggregate_rating']].groupby('country_code').mean().sort_values('aggregate_rating', ascending = True).head(5).reset_index(),2)

Unnamed: 0,country_code,aggregate_rating
0,Brazil,3.32
1,United Arab Emirates,4.02
2,India,4.04
3,England,4.04
4,Sri Lanka,4.06


### 10. Qual é a média de preço de um prato para dois por país?

In [29]:
np.round(df.loc[:, ['country_code', 'average_cost_for_two']].groupby('country_code').mean().sort_values('average_cost_for_two', ascending = False).reset_index(),2)

Unnamed: 0,country_code,average_cost_for_two
0,Indonesia,303000.0
1,Australia,138959.78
2,Sri Lanka,2579.38
3,Philippines,1227.82
4,India,704.4
5,South Africa,339.23
6,Qatar,174.0
7,United Arab Emirates,153.72
8,Singapure,141.44
9,Brazil,138.81


## 3.3. Análise Restaurantes

### 1. Qual o nome do restaurante  que possui maior quantidade de avaliações?

In [30]:
(df.loc[:, ['votes', 'restaurant_name', 'country_code', 'city', 'aggregate_rating']]
  .groupby(['restaurant_name', 'aggregate_rating', 'votes'])
  .sum()
  .sort_values(by='votes', ascending = False)
  .head(1)
  .reset_index())

Unnamed: 0,restaurant_name,aggregate_rating,votes,country_code,city
0,Bawarchi,4.5,41333,India,Hyderabad


### 2. Qual o nome do restaurante com maior nota média?

In [31]:
linhas = df['aggregate_rating'] == 4.9
df.loc[linhas, ['restaurant_name', 'aggregate_rating', 'votes', 'country_code','city']].sort_values(by='votes', ascending = False).head(1).reset_index()

Unnamed: 0,index,restaurant_name,aggregate_rating,votes,country_code,city
0,3038,Byg Brewski Brewing Company,4.9,17394,India,Bangalore


In [32]:
df.loc[df['restaurant_name'] == 'Byg Brewski Brewing Company',['restaurant_name', 'address', 'aggregate_rating']]

Unnamed: 0,restaurant_name,address,aggregate_rating
2975,Byg Brewski Brewing Company,"22/123, Byrathi Village, Bidarahalli Hobli, He...",4.8
3038,Byg Brewski Brewing Company,"Behind MK Retail, Sarjapur Road, Bangalore",4.9


### 3. Qual o nome do restaurante que possui o maior valor de um prato para duas pessoas?

In [33]:
df.loc[:, ['restaurant_name','average_cost_for_two', 'cuisines', 'country_code']].sort_values(by='average_cost_for_two', ascending = False).head(1).reset_index()

Unnamed: 0,index,restaurant_name,average_cost_for_two,cuisines,country_code
0,385,D'Arry'S Verandah Restaurant,25000017,Modern Australian,Australia


### 4. Qual o nome do restaurante de tipo de culinária brasileira que possui a maior média de avaliação?

In [34]:
(df.loc[(df['country_code'] == 'Brazil') & (df['aggregate_rating']  == 4.9), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'votes', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,254,Aprazível,Brazil,Rio de Janeiro,Brazilian,4.9,63


### 5. Qual o nome do restaurante de tipo de culinária brasileira que possui a menor média de avaliação?

In [35]:
(df.loc[(df['country_code'] == 'Brazil') & (df['aggregate_rating']  != 0.0), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'aggregate_rating', ascending = True)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,245,Natu Sucos,Brazil,Rio de Janeiro,Healthy Food,2.3,3


### 6. Os restaurantes que aceitam pedido online são também, na média, os restaurantes que mais possuem avaliações registradas?

In [36]:
round(df.loc[(df['has_online_delivery'] == 1), 'votes'].agg(['sum', 'count','mean']).reset_index(),2)

Unnamed: 0,index,votes
0,sum,2036659.0
1,count,2428.0
2,mean,838.82


In [37]:
r = np.round(df.loc[(df['has_online_delivery'] == 1), 'aggregate_rating'].mean(),2)
print(f'Os restaurantes que ACEITAM pedido online, tem média {r}')

Os restaurantes que ACEITAM pedido online, tem média 4.1


---

In [38]:
round(df.loc[(df['has_online_delivery'] == 0), 'votes'].agg(['sum', 'count','mean']).reset_index(),2)

Unnamed: 0,index,votes
0,sum,2157874.0
1,count,4501.0
2,mean,479.42


In [39]:
r = np.round(df.loc[(df['has_online_delivery'] == 0), 'aggregate_rating'].mean(),2)
print(f'Os restaurantes que  NÃO ACEITAM pedido online, tem média {r}')

Os restaurantes que  NÃO ACEITAM pedido online, tem média 4.15


### 7. Os restaurantes que fazem reservas são também, na média, os restaurantes que possuem o maior valor médio de um prato para duas pessoas?

In [40]:
round(df.loc[(df['has_table_booking'] == 1), 'average_cost_for_two'].agg(['sum', 'count','mean']).reset_index(),2)

Unnamed: 0,index,average_cost_for_two
0,sum,29399338.0
1,count,420.0
2,mean,69998.42


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [41]:
round(df.loc[(df['has_table_booking'] == 0), 'average_cost_for_two'].agg(['sum', 'count','mean']).reset_index(),2)

Unnamed: 0,index,average_cost_for_two
0,sum,22707277.0
1,count,6509.0
2,mean,3488.6


### 8. Os restaurantes do tipo culinária japonesa dos EUA possuem um valor médio de prato para duas pessoas maior que as churrascarias americanas (BBQ)?

In [42]:
round(df.loc[(df['cuisines'] == 'Japanese') & (df['country_code'] == 'United States of America'), 'average_cost_for_two'].agg(['sum', 'count','mean']).reset_index(),2)

Unnamed: 0,index,average_cost_for_two
0,sum,1805.0
1,count,32.0
2,mean,56.41


-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [43]:
round(df.loc[(df['cuisines'] == 'BBQ') & (df['country_code'] == 'United States of America'), 'average_cost_for_two'].agg(['sum', 'count','mean']).reset_index(),2)

Unnamed: 0,index,average_cost_for_two
0,sum,1665.0
1,count,42.0
2,mean,39.64


## 3.4. Análise Culinárias

### 1. Dos restaurantes que possuem o tipo de  culinária italiana, qual o nome do restaurante com a maior média de avaliação? 

In [44]:
(df.loc[(df['cuisines'] == 'Italian') & (df['aggregate_rating']  == 4.9), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'votes', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,5408,Darshan,India,Pune,Italian,4.9,3106


### 2. Dos restaurantes que possuem o tipo de  culinária italiana, qual o nome do restaurante com a menor média de avaliação? 

In [45]:
(df.loc[(df['cuisines'] == 'Italian') & (df['aggregate_rating']  > 0.0), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'aggregate_rating', ascending = True)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,1587,Siena'S,Australia,Perth,Italian,2.3,832


### 3. Dos restaurantes que possuem o tipo de  culinária americana, qual o nome do restaurante com a maior média de avaliação? 

In [46]:
(df.loc[(df['cuisines'] == 'American') & (df['aggregate_rating']  == 4.9), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'votes', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,1469,Shake Shack,United States of America,New York City,American,4.9,1633


### 4. Dos restaurantes que possuem o tipo de  culinária americana, qual o nome do restaurante com a menor média de avaliação? 

In [47]:
(df.loc[(df['cuisines'] == 'American') & (df['aggregate_rating']  > 0.0), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'aggregate_rating', ascending = True)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,7204,Guy Fieri'S Kitchen & Bar,South Africa,Pretoria,American,2.2,183


### 5. Dos restaurantes que possuem o tipo de  culinária arabe, qual o nome do restaurante com a maior média de avaliação? 

In [48]:
(df.loc[(df['cuisines'] == 'Arabian') & (df['aggregate_rating']  < 4.9), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'aggregate_rating', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,3896,Mandi@36,India,Hyderabad,Arabian,4.7,1085


### 6. Dos restaurantes que possuem o tipo de  culinária arabe, qual o nome do restaurante com a menor média de avaliação? 

In [49]:
(df.loc[(df['cuisines'] == 'Arabian') & (df['aggregate_rating']  > 0.0), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'aggregate_rating', ascending = True)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,169,Empório Árabe,Brazil,Brasília,Arabian,2.7,5


### 7. Dos restaurantes que possuem o tipo de  culinária japonesa, qual o nome do restaurante com a maior média de avaliação? 

In [50]:
(df.loc[(df['cuisines'] == 'Japanese') & (df['aggregate_rating']  == 4.9), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'votes', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,17,Mendokoro Ramenba,Philippines,Makati City,Japanese,4.9,1832


### 8. Dos restaurantes que possuem o tipo de culinária japonesa, qual o nome do restaurante com a menor média de avaliação? 

In [51]:
(df.loc[(df['cuisines'] == 'Japanese') & (df['aggregate_rating']  > 0.0), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'aggregate_rating', ascending = True)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,181,Orienthai,Brazil,Rio de Janeiro,Japanese,2.4,8


### 9. Dos restaurantes que possuem o tipo de culinária  qual o nome do restaurante com a maior média de avaliação? 

In [52]:
(df.loc[(df['cuisines'] == 'Home-made') & (df['aggregate_rating']  < 4.9), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'votes', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,7525,Kanaat Lokantası,Turkey,İstanbul,Home-made,4.0,770


### 10. Dos restaurantes que possuem o tipo de culinária  qual o nome do restaurante com a menor média de avaliação? 

In [53]:
(df.loc[(df['cuisines'] == 'Home-made') & (df['aggregate_rating']  > 0.0), ['restaurant_name', 'country_code', 'city', 'cuisines', 'aggregate_rating', 'votes']]
   .sort_values(by = 'aggregate_rating', ascending = True)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,country_code,city,cuisines,aggregate_rating,votes
0,7373,Gurmekan Restaurant,Turkey,Ankara,Home-made,3.7,12


### 11. Qual é o tipo de culinária que possui o maior valor médio de um prato para duas pessoas?

In [54]:
df.loc[:, ['cuisines', 'average_cost_for_two']].groupby('cuisines').max().sort_values(by = 'average_cost_for_two', ascending = False).head(1)

Unnamed: 0_level_0,average_cost_for_two
cuisines,Unnamed: 1_level_1
Modern Australian,25000017


### 12. Qual é o tipo de culinária que possui a maior nota média?

In [55]:
(df.loc[(df['aggregate_rating']  == 4.9), ['restaurant_name', 'cuisines', 'aggregate_rating', 'votes', 'country_code', 'city']]
   .sort_values(by = 'votes', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,index,restaurant_name,cuisines,aggregate_rating,votes,country_code,city
0,3038,Byg Brewski Brewing Company,Continental,4.9,17394,India,Bangalore


### 13. Qual o tipo de culinária que possui mais restaurantes que aceitam pedidos online e fazem entregas?

In [56]:
(df.loc[(df['has_online_delivery']  == 1) & (df['is_delivering_now']  == 1), ['cuisines', 'has_online_delivery']]
   .groupby('cuisines')
   .count()
   .sort_values(by = 'has_online_delivery', ascending = False)
   .head(1)
   .reset_index())

Unnamed: 0,cuisines,has_online_delivery
0,North Indian,317


# 4.0. Deploy

## 4.1. Funções Gráficos 

In [57]:
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------PAISES--------------------------------------------------------------------------------------

def countries_operations (df, coly, operation, labelx, labely):
    if operation == 'count':
        
        df_aux = df.loc[:, ['country_code', coly]].groupby('country_code').count().reset_index()
        fig = px.bar(df_aux, x = 'country_code', y = coly, text_auto=True, color = 'country_code', 
                     color_discrete_sequence = px.colors.sequential.amp, 
                     labels={coly:labely, 'country_code':labelx}, height= 600)
        
        fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)', 
                          marker_line_width=0.8)
        fig.update_layout(barmode='stack', xaxis={'categoryorder': 'total descending'}, showlegend=False)
    
    elif operation == 'nunique':
        df_aux = df.loc[:, ['country_code', coly]].groupby('country_code').nunique().reset_index()
        fig = px.bar(df_aux, x = 'country_code', y = coly, text_auto=True, color = 'country_code',
                     color_discrete_sequence = px.colors.sequential.amp, 
                    labels={coly:labely, 'country_code':labelx}, height= 600)
        fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)', 
                          marker_line_width=0.8)
        fig.update_layout(barmode='stack', xaxis={'categoryorder': 'total descending'}, showlegend=False)
        
    elif operation == 'mean':
        df_aux = np.round ( df.loc[:, ['country_code', coly]].groupby('country_code').mean().reset_index(), 2)
        fig = px.bar(df_aux, x = 'country_code', y = coly, text_auto=True, color = 'country_code',
                     color_discrete_sequence = px.colors.sequential.amp, 
                    labels={coly:labely, 'country_code':labelx}, height= 600)
        fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)', 
                          marker_line_width=0.8)
        fig.update_layout(barmode='stack', xaxis={'categoryorder': 'total descending'}, showlegend=False)
    
    return fig

def df_tables (df, condicao):
    if condicao == 'mean':
        df_tab = np.round(df.loc[: ,['country_code', 'price_in_dollar']]
                         .groupby('country_code')
                         .mean('price_in_dollar')
                         .sort_values(['price_in_dollar'], ascending = False)
                         .reset_index(),2)
        df_tab.columns = ['Country', 'Average Cost for Two (US$)']
    
    elif condicao == 'nunique':
        df_tab = np.round(df.loc[:, ['country_code', 'cuisines']]
                            .groupby('country_code')
                            .nunique()
                            .sort_values(by = 'cuisines', ascending = False)
                            .reset_index(),2)
        df_tab.columns = ['Country', 'Cuisines']
    
    return df_tab

#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 
#---------------------------------------------------------------------------MENU CIDADES------------------------------------------------------------------------------------

def top_ten_countries (df):
    lin = (df['country_code'].isin(paises))
    df_aux = (df.loc[lin,['restaurant_name','city', 'country_code']]
                .groupby(['city','country_code'])
                .nunique()
                .sort_values(by = 'restaurant_name', ascending = True)
                .tail(10)
                .reset_index())
    fig = px.bar(df_aux, x = 'city', y = 'restaurant_name', color = 'country_code',  
                 color_discrete_sequence = px.colors.sequential.amp, text_auto=True, 
                 labels={'city': 'Cidades', 'restaurant_name':'Quantidade de Restaurantes', 
                         'country_code':'Países'}, height= 600)

    fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)', marker_line_width=0.8)
    fig.update_layout(xaxis={'categoryorder': 'total descending'})
    
    return fig

def avg_topseven_cities (df, condition, mean):
    lin = (df['country_code'].isin(paises))
    if condition == 'maior_igual':
        df_aux = (df.loc[(df['aggregate_rating'] >= mean) & lin,['restaurant_name','city', 'country_code']]
                    .groupby(['country_code','city'])
                    .count()
                    .sort_values('restaurant_name', ascending = False)
                    .head(7)
                    .reset_index())

        fig = px.bar(df_aux, x = 'city', y = 'restaurant_name', 
                     color = 'country_code', color_discrete_sequence = px.colors.sequential.amp, text_auto=True, 
                     labels={'city': 'Cidades', 'restaurant_name':'Quantidade de Restaurantes', 
                             'country_code':'Países'}, height= 600)
        
        fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)',marker_line_width=0.8)
        fig.update_layout(xaxis={'categoryorder': 'total descending'})
    
    elif condition == 'menor_igual':
        df_aux = (df.loc[(df['aggregate_rating'] <= mean) & lin,['restaurant_name','city', 'country_code']]
                    .groupby(['country_code','city'])
                    .count()
                    .sort_values('restaurant_name', ascending = False)
                    .head(7)
                    .reset_index())

        fig = px.bar(df_aux, x = 'city', y = 'restaurant_name', 
                     color = 'country_code', color_discrete_sequence = px.colors.sequential.amp, text_auto=True,
                     labels={'city': 'Cidades', 'restaurant_name':'Quantidade de Restaurantes', 
                             'country_code':'Países'}, height= 600)
        
        fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)',marker_line_width=0.8)
        fig.update_layout(xaxis={'categoryorder': 'total descending', })
        
    return fig

def cooking_varieties (df):
    lin = (df['country_code'].isin(paises))
    df_aux = (df.loc[lin, ['country_code', 'city', 'cuisines']]
                .groupby(['city', 'country_code'])
                .nunique()
                .sort_values(by = 'cuisines', ascending = False)
                .head(10)
                .reset_index())
    fig = px.bar(df_aux, x = 'city', y = 'cuisines', color = 'country_code', color_discrete_sequence = px.colors.sequential.amp, 
                 text_auto=True, labels={'cuisines': 'Tipos de Cozinha', 'city':'Cidades', 'country_code': 'Países'}, height= 800)
    
    fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)',marker_line_width=0.8)
    fig.update_layout(xaxis={'categoryorder': 'total descending'}, )
    
    return fig

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------------------------------------MENU CULINÁRIAS---------------------------------------------------------------------------

def top_cuisines (df, condicao, top_culinarias):
    if condicao == 'melhor':
        df_aux = np.round(df.loc[(df['aggregate_rating']  >= 4.0) & (df['rating_text']  != 'Not rated'), ['cuisines','aggregate_rating']]
                            .groupby(['cuisines'])
                            .mean('aggregate_rating')
                            .sort_values('aggregate_rating', ascending = False)
                            .reset_index(), 2)
        df_fig = df_aux.head(top_culinarias)
        fig = px.bar(df_fig, x = 'aggregate_rating', y = 'cuisines', color = 'cuisines', color_discrete_sequence = px.colors.sequential.amp, 
                     text_auto=True, labels={'cuisines': 'Tipos de Cozinha', 'aggregate_rating':'Média de Avaliação'}, height= 800)
        fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)',marker_line_width=0.8)
        fig.update_layout(yaxis={'categoryorder': 'total ascending'}, showlegend=False)


    elif condicao == 'pior':
        df_aux = np.round(df.loc[(df['aggregate_rating']  >= 0.0) &  (df['rating_text']  != 'Not rated'), ['cuisines','aggregate_rating']]
                            .groupby(['cuisines'])
                            .mean('aggregate_rating')
                            .sort_values('aggregate_rating', ascending = True)
                            .reset_index(), 2)
        df_fig = df_aux.head(top_culinarias)
        fig = px.bar(df_fig, x = 'aggregate_rating', y = 'cuisines', color = 'cuisines', color_discrete_sequence = px.colors.sequential.amp, 
                     text_auto=True, labels={'cuisines': 'Tipos de Cozinha', 'aggregate_rating':'Média de Avaliação'}, height= 800)
        fig.update_traces(textfont_size=14, textposition="outside", marker_line_color='rgb(69, 71, 72, 1)',marker_line_width=0.8)
        fig.update_layout(yaxis={'categoryorder': 'total descending'}, showlegend=False)
    
    return fig

def zero_rating(df):
    df_zero = np.round(df.loc[(df['aggregate_rating']  == 0.0), ['restaurant_name', 'cuisines','country_code', 'city','aggregate_rating', 'price_range']]
                     .groupby(['restaurant_name', 'cuisines', 'country_code', 'city', 'price_range'])
                     .mean('aggregate_rating')
                     .sort_values('aggregate_rating', ascending = True)
                     .reset_index(), 2)
    df_zero.columns = ['Restaurant', 'Cuisines', 'Country', 'City', 'Price Range' ,'Aggregate Rating']
    
    return df_zero

#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------MENU RESTAURANTES --------------------------------------------------------------------------
def country_maps (df):
    
    df_maps = (df.loc[:, ['restaurant_name','city', 'latitude','longitude', 'rating_color', 'price_range']]
                 .groupby(['city', 'restaurant_name','rating_color', 'price_range'])
                 .median()
                 .reset_index())
    
    map_ = folium.Map( zoom_start = 16)

    marker_cluster = MarkerCluster().add_to(map_)

    for index, location_info in df_maps.iterrows():
        folium.Marker (
            location = [location_info['latitude'], location_info['longitude']],
            popup = location_info ['price_range'],
            tooltip= location_info ['restaurant_name'],
            icon = folium.Icon (color = location_info['rating_color'], prefix = 'fa', icon = 'circle'),
    ).add_to(marker_cluster)
    
    folium_static (map_, width =1024)
    
    return None

def top_restaurants(df):
    cols = ['restaurant_name', 'country_code', 'city', 'cuisines', 'price_range','price_in_dollar', 'aggregate_rating', 'votes']
    lin = (df['aggregate_rating'] <= 4.9) & (df['country_code'].isin(paises))
            
            
    dataframe= np.round(df.loc[lin,cols]
                 .groupby(['restaurant_name', 'cuisines', 'country_code', 'city', 'price_range'])
                 .mean(['aggregate_rating','votes'])
                 .sort_values(['aggregate_rating','votes'], ascending = False)
                 .reset_index(),2)
    dataframe.columns = ['Restaurant',' Cuisines','Country', 'City', 'Price Range','Average Cost for Two (US$)', 'Aggregate Rating', 'Votes']
    df2 = dataframe.head(top_20)
    
    return df2