In [1]:
# bibliotecas necessárias
import pandas as pd
from haversine import haversine
import folium
import plotly.express as px
from streamlit_folium import folium_static
from folium.plugins import MarkerCluster
import streamlit as st
from PIL import Image
from datetime import datetime




In [2]:
def clean_code(df):
    """ Esta funcao tem a responsabilidade de limpar o dataframe
        
        Tipos de limpeza:
        1. Remover a coluna Switch to order menu pois tem o mesmo valor em todas as linhas
        2. Remover as linhas duplicadas
        3. Remover as NaNs de todas as observações que possuem NaN em alguma variável
        
        Input: Dataframe
        Output: Dataframe
    """


    # 2. Remover as linhas duplicadas
    df = df.drop_duplicates()
    
    # 3. Remover as NaNs de todas as observações que possuem NaN em alguma variável
    df = df.dropna()
    
    return df

In [3]:
# Preenchimento do nome dos países
COUNTRIES = {
1: "India",
14: "Australia",
30: "Brazil",
37: "Canada",
94: "Indonesia",
148: "New Zeland",
162: "Philippines",
166: "Qatar",
184: "Singapure",
189: "South Africa",
191: "Sri Lanka",
208: "Turkey",
214: "United Arab Emirates",
215: "England",
216: "United States of America",
}
def country_name(country_id):
  return COUNTRIES[country_id]

In [4]:
# Criação do Tipo de Categoria de Comida
def create_price_tye(price_range):
  if price_range == 1:
    return "cheap"
  elif price_range == 2:
    return "normal"
  elif price_range == 3:
    return "expensive"
  else:
    return "gourmet"

In [5]:
# Definir cores por país:
country_colors ={
"India": 'yellow',
"Australia": 'darkblue',
"Brazil": 'green',
"Canada": 'red',
"Indonesia": 'orange',
"New Zeland": 'purple',
"Philippines": 'brown',
"Qatar": 'gray',
"Singapure": 'lightblue',
"South Africa": 'pink',
"Sri Lanka": 'darkgreen',
"Turkey": 'darkred',
"United Arab Emirates": 'goldenrod',
"England": 'lightgreen',
"United States of America": 'black',
}

In [6]:
# Criação do nome das Cores

COLORS = {
"3F7E00": "darkgreen",
"5BA829": "green",
"9ACD32": "lightgreen",
"CDD614": "orange",
"FFBA00": "red",
"CBCBC8": "darkred",
"FF7800": "darkred",
}
def color_name(color_code):
  return COLORS[color_code]

In [7]:
# Renomear as colunas do DataFrame
def rename_columns(dataframe):
  df = dataframe.copy()
  title = lambda x: inflection.titleize(x)
  snakecase = lambda x: inflection.underscore(x)
  spaces = lambda x: x.replace(" ", "")
  cols_old = list(df.columns)
  cols_old = list(map(title, cols_old))
  cols_old = list(map(spaces, cols_old))
  cols_new = list(map(snakecase, cols_old))
  df.columns = cols_new

  return df

In [8]:
df = pd.read_csv ('zomato.csv')

In [9]:
# Limpando os dados
df = clean_code (df)

# Categorizar os restaurantes somente por um tipo de culinária:
df["Cuisines"] = df.loc[:, "Cuisines"].apply(lambda x: x.split(",")[0])

# Criar uma nova coluna com os nomes dos países
df["Country"] = df["Country Code"].apply(country_name)

In [10]:
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes,Country
0,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,...,1,0,0,0,3,4.6,3F7E00,Excellent,619,Philippines
2,6314542,Blackbird,162,Makati City,"Nielson Tower, Ayala Triangle Gardens, Salcedo...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.024562,14.556042,European,...,0,0,0,0,4,4.7,3F7E00,Excellent,469,Philippines
3,6301293,Banapple,162,Makati City,"Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.023171,14.556196,Filipino,...,0,0,0,0,3,4.4,5BA829,Very Good,867,Philippines
4,6315689,Bad Bird,162,Makati City,"Hole In The Wall, Floor 4, Century City Mall, ...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027708,14.565899,American,...,0,0,0,0,3,4.4,5BA829,Very Good,858,Philippines
5,6304833,Manam,162,Makati City,"Level 1, Greenbelt 2, Ayala Center, Greenbelt,...","Greenbelt 2, San Lorenzo, Makati City","Greenbelt 2, San Lorenzo, Makati City, Makati ...",121.02038,14.552351,Filipino,...,0,0,0,0,3,4.7,3F7E00,Excellent,930,Philippines


In [11]:
# 1. Quantos restaurantes únicos estão registrados?

In [13]:
df.loc[:,'Restaurant ID'].nunique()

6929

In [13]:
# 2. Quantos países únicos estão registrados?

In [14]:
df.loc[:,'Country Code'].nunique()

15

In [15]:
# 3. Quantas cidades únicas estão registradas?
df.loc[:,'City'].nunique()

125

In [16]:
# 4. Qual o total de avaliações feitas?
total_avaliacao = df['Votes'].sum()
print ('Total de avaliações é', total_avaliacao)

Total de avaliações é 4194533


In [17]:
# 5. Qual o total de tipos de culinária registrados?
total_culinaria = df['Cuisines'].nunique()
print ('Total de tipos de culinária registadros é',total_culinaria)

Total de tipos de culinária registadros é 165


In [18]:
df.loc[:,['City','Country']].groupby('Country').nunique().sort_values('City',ascending=False).reset_index()

Unnamed: 0,Country,City
0,India,49
1,United States of America,22
2,Philippines,12
3,South Africa,12
4,England,5
5,United Arab Emirates,4
6,New Zeland,4
7,Indonesia,3
8,Australia,3
9,Brazil,3


In [14]:
df.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes', 'Country'],
      dtype='object')