# Projeto Fome Zero

## Tratamento do dataset

In [1]:
import pandas as pd
import numpy as np
import inflection

### Funções auxiliares

In [2]:
# dicionário com os nomes dos países com base no código de cada país
# substituindo o código pelo nome do país
COUNTRIES = {
    1: "India",
    14: "Australia",
    30: "Brazil",
    37: "Canada",
    94: "Indonesia",
    148: "New Zeland",
    162: "Philippines",
    166: "Qatar",
    184: "Singapure",
    189: "South Africa",
    191: "Sri Lanka",
    208: "Turkey",
    214: "United Arab Emirates",
    215: "England",
    216: "United States of America",
}


def country_name(country_id):
    return COUNTRIES.get(country_id, 'Unknown Country')

In [3]:
# retorna o tipo de comida com base no range de valores
def create_price_type(price_range):
    match price_range:
        case 1:
            return "cheap"
        case 2:
            return "normal"
        case 3:
            return "expensive"
        case _:
            return "gourmet"

In [4]:
# nome das cores com base nos códigos de cores
# substituindo o código de cor pelo nome da cor
COLORS = {
    "3F7E00": "darkgreen",
    "5BA829": "green",
    "9ACD32": "lightgreen",
    "CDD614": "orange",
    "FFBA00": "red",
    "CBCBC8": "darkred",
    "FF7800": "darkred",
}


def color_name(color_code):
    return COLORS[color_code]

In [5]:
# renomear as colunas do DataFrame
def rename_columns(dataframe):
    df = dataframe.copy()
    def title(x): return inflection.titleize(x)
    def snakecase(x): return inflection.underscore(x)
    def spaces(x): return x.replace(" ", "")
    cols_old = list(df.columns)
    cols_old = list(map(title, cols_old))
    cols_old = list(map(spaces, cols_old))
    cols_new = list(map(snakecase, cols_old))
    df.columns = cols_new
    return df

### Analisando o dataset

In [6]:
df = pd.read_csv("../dataset/zomato.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7527 entries, 0 to 7526
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         7527 non-null   int64  
 1   Restaurant Name       7527 non-null   object 
 2   Country Code          7527 non-null   int64  
 3   City                  7527 non-null   object 
 4   Address               7527 non-null   object 
 5   Locality              7527 non-null   object 
 6   Locality Verbose      7527 non-null   object 
 7   Longitude             7527 non-null   float64
 8   Latitude              7527 non-null   float64
 9   Cuisines              7512 non-null   object 
 10  Average Cost for two  7527 non-null   int64  
 11  Currency              7527 non-null   object 
 12  Has Table booking     7527 non-null   int64  
 13  Has Online delivery   7527 non-null   int64  
 14  Is delivering now     7527 non-null   int64  
 15  Switch to order menu 

In [7]:
pd.set_option('display.max_columns', None)
df.head(2)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,Average Cost for two,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,0,3,4.6,3F7E00,Excellent,619
1,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,0,3,4.6,3F7E00,Excellent,619


In [8]:
df.shape

(7527, 21)

In [9]:
df1 = df.copy()

In [10]:
# renomar as colunas do dataset
df1 = rename_columns(df1)
df1.head(2)

Unnamed: 0,restaurant_id,restaurant_name,country_code,city,address,locality,locality_verbose,longitude,latitude,cuisines,average_cost_for_two,currency,has_table_booking,has_online_delivery,is_delivering_now,switch_to_order_menu,price_range,aggregate_rating,rating_color,rating_text,votes
0,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,0,3,4.6,3F7E00,Excellent,619
1,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,0,3,4.6,3F7E00,Excellent,619


In [11]:
df1['restaurant_id'].value_counts()

restaurant_id
3900267     3
18691230    3
21546       2
2900232     2
2800579     2
           ..
16540543    1
18578049    1
18579064    1
16541680    1
16540470    1
Name: count, Length: 6942, dtype: int64

In [12]:
df1_drop_duplicates = df1.drop_duplicates(subset=['restaurant_id'])

In [13]:
df1 = df1_drop_duplicates.copy()
df1.shape

(6942, 21)

In [14]:
# remover colunas que não serão utilizadas
df1 = df1.drop('switch_to_order_menu', axis='columns')
df1.head(2)

Unnamed: 0,restaurant_id,restaurant_name,country_code,city,address,locality,locality_verbose,longitude,latitude,cuisines,average_cost_for_two,currency,has_table_booking,has_online_delivery,is_delivering_now,price_range,aggregate_rating,rating_color,rating_text,votes
0,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,3,4.6,3F7E00,Excellent,619
2,6314542,Blackbird,162,Makati City,"Nielson Tower, Ayala Triangle Gardens, Salcedo...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.024562,14.556042,"European, Asian",3100,Botswana Pula(P),0,0,0,4,4.7,3F7E00,Excellent,469


In [15]:
# remover linhas com valores nulos
df1.isna().sum()

restaurant_id            0
restaurant_name          0
country_code             0
city                     0
address                  0
locality                 0
locality_verbose         0
longitude                0
latitude                 0
cuisines                13
average_cost_for_two     0
currency                 0
has_table_booking        0
has_online_delivery      0
is_delivering_now        0
price_range              0
aggregate_rating         0
rating_color             0
rating_text              0
votes                    0
dtype: int64

In [16]:
df1.dropna(inplace=True)

In [17]:
df1.head(10)

Unnamed: 0,restaurant_id,restaurant_name,country_code,city,address,locality,locality_verbose,longitude,latitude,cuisines,average_cost_for_two,currency,has_table_booking,has_online_delivery,is_delivering_now,price_range,aggregate_rating,rating_color,rating_text,votes
0,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,3,4.6,3F7E00,Excellent,619
2,6314542,Blackbird,162,Makati City,"Nielson Tower, Ayala Triangle Gardens, Salcedo...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.024562,14.556042,"European, Asian",3100,Botswana Pula(P),0,0,0,4,4.7,3F7E00,Excellent,469
3,6301293,Banapple,162,Makati City,"Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.023171,14.556196,"Filipino, American, Italian, Bakery",800,Botswana Pula(P),0,0,0,3,4.4,5BA829,Very Good,867
4,6315689,Bad Bird,162,Makati City,"Hole In The Wall, Floor 4, Century City Mall, ...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027708,14.565899,American,700,Botswana Pula(P),0,0,0,3,4.4,5BA829,Very Good,858
5,6304833,Manam,162,Makati City,"Level 1, Greenbelt 2, Ayala Center, Greenbelt,...","Greenbelt 2, San Lorenzo, Makati City","Greenbelt 2, San Lorenzo, Makati City, Makati ...",121.02038,14.552351,Filipino,700,Botswana Pula(P),0,0,0,3,4.7,3F7E00,Excellent,930
6,18409457,Soban K-Town Grill,162,Makati City,"Level 3, Greenbelt 3, Ayala Center, Greenbelt,...","Greenbelt 3, San Lorenzo, Makati City","Greenbelt 3, San Lorenzo, Makati City, Makati ...",121.021388,14.552248,"Korean, Grill",1300,Botswana Pula(P),0,0,0,3,4.7,3F7E00,Excellent,935
7,18607559,Bluesmith Coffee & Kitchen,162,Makati City,"Level 3, Greenbelt 3, Ayala Center, Greenbelt,...","Greenbelt 3, San Lorenzo, Makati City","Greenbelt 3, San Lorenzo, Makati City, Makati ...",121.021374,14.552044,"American, Filipino, Coffee",700,Botswana Pula(P),0,0,0,3,4.0,5BA829,Very Good,340
8,6314001,Motorino Pizzeria Napoletana,162,Makati City,"Level 2, Greenbelt 3, Ayala Center, Greenbelt,...","Greenbelt 3, San Lorenzo, Makati City","Greenbelt 3, San Lorenzo, Makati City, Makati ...",121.021772,14.551875,"Pizza, Italian",1000,Botswana Pula(P),0,1,1,3,4.3,5BA829,Very Good,449
9,18189398,Shi Lin,162,Makati City,"Level 3, Greenbelt 3, Ayala Center, Greenbelt,...","Greenbelt 3, San Lorenzo, Makati City","Greenbelt 3, San Lorenzo, Makati City, Makati ...",121.021653,14.552189,Taiwanese,1000,Botswana Pula(P),0,1,0,3,4.1,5BA829,Very Good,201
10,6318945,Nikkei,162,Makati City,"Unit GO3, Frabelle Business Center, 111 Rada S...",Legaspi Village,"Legaspi Village, Makati City",121.018108,14.555784,"Japanese, Latin American",1600,Botswana Pula(P),1,0,0,4,4.6,3F7E00,Excellent,394


In [18]:
# resetar o index do dataset
df1.reset_index(drop=True, inplace=True)

In [19]:
df1.head(5)

Unnamed: 0,restaurant_id,restaurant_name,country_code,city,address,locality,locality_verbose,longitude,latitude,cuisines,average_cost_for_two,currency,has_table_booking,has_online_delivery,is_delivering_now,price_range,aggregate_rating,rating_color,rating_text,votes
0,6310675,Mama Lou's Italian Kitchen,162,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,3,4.6,3F7E00,Excellent,619
1,6314542,Blackbird,162,Makati City,"Nielson Tower, Ayala Triangle Gardens, Salcedo...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.024562,14.556042,"European, Asian",3100,Botswana Pula(P),0,0,0,4,4.7,3F7E00,Excellent,469
2,6301293,Banapple,162,Makati City,"Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.023171,14.556196,"Filipino, American, Italian, Bakery",800,Botswana Pula(P),0,0,0,3,4.4,5BA829,Very Good,867
3,6315689,Bad Bird,162,Makati City,"Hole In The Wall, Floor 4, Century City Mall, ...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027708,14.565899,American,700,Botswana Pula(P),0,0,0,3,4.4,5BA829,Very Good,858
4,6304833,Manam,162,Makati City,"Level 1, Greenbelt 2, Ayala Center, Greenbelt,...","Greenbelt 2, San Lorenzo, Makati City","Greenbelt 2, San Lorenzo, Makati City, Makati ...",121.02038,14.552351,Filipino,700,Botswana Pula(P),0,0,0,3,4.7,3F7E00,Excellent,930


In [22]:

colunas = list(df1.columns)
for col in colunas:
    num_nans = df1[col].isna().sum()
    print(f'{col} tem {num_nans} NaN')

restaurant_id tem 0 NaN
restaurant_name tem 0 NaN
country_code tem 0 NaN
city tem 0 NaN
address tem 0 NaN
locality tem 0 NaN
locality_verbose tem 0 NaN
longitude tem 0 NaN
latitude tem 0 NaN
cuisines tem 0 NaN
average_cost_for_two tem 0 NaN
currency tem 0 NaN
has_table_booking tem 0 NaN
has_online_delivery tem 0 NaN
is_delivering_now tem 0 NaN
price_range tem 0 NaN
aggregate_rating tem 0 NaN
rating_color tem 0 NaN
rating_text tem 0 NaN
votes tem 0 NaN


In [23]:
df1['cuisines']

0                                       Italian
1                               European, Asian
2           Filipino, American, Italian, Bakery
3                                      American
4                                      Filipino
                         ...                   
6924                 Italian, Pizza, Fresh Fish
6925    Fast Food, Izgara, Seafood, Tea, Coffee
6926                            Restaurant Cafe
6927                          Home-made, Izgara
6928      Restaurant Cafe, Kebab, Turkish Pizza
Name: cuisines, Length: 6929, dtype: object

In [25]:
df1["cuisines"] = df1.loc[:, "cuisines"].apply(lambda x: x.split(",")[0])

In [26]:
df1['cuisines']

0               Italian
1              European
2              Filipino
3              American
4              Filipino
             ...       
6924            Italian
6925          Fast Food
6926    Restaurant Cafe
6927          Home-made
6928    Restaurant Cafe
Name: cuisines, Length: 6929, dtype: object

In [30]:
df1['country_name'] = df1['country_code'].apply(
    lambda x: country_name(x))

cols = list(df1.columns)
cols.insert(cols.index('country_code')+1, cols.pop(cols.index('country_name')))
df1 = df1[cols]
df1.head()

Unnamed: 0,restaurant_id,restaurant_name,country_code,country_name,city,address,locality,locality_verbose,longitude,latitude,cuisines,average_cost_for_two,currency,has_table_booking,has_online_delivery,is_delivering_now,price_range,aggregate_rating,rating_color,rating_text,votes
0,6310675,Mama Lou's Italian Kitchen,162,Philippines,Las Piñas City,"Block 1, Lot 36, Tropical Avenue Corner Tropic...",BF International,"BF International, Las Piñas City",121.009787,14.447615,Italian,1100,Botswana Pula(P),1,0,0,3,4.6,3F7E00,Excellent,619
1,6314542,Blackbird,162,Philippines,Makati City,"Nielson Tower, Ayala Triangle Gardens, Salcedo...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.024562,14.556042,European,3100,Botswana Pula(P),0,0,0,4,4.7,3F7E00,Excellent,469
2,6301293,Banapple,162,Philippines,Makati City,"Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...","Ayala Triangle Gardens, Salcedo Village, Makat...",121.023171,14.556196,Filipino,800,Botswana Pula(P),0,0,0,3,4.4,5BA829,Very Good,867
3,6315689,Bad Bird,162,Philippines,Makati City,"Hole In The Wall, Floor 4, Century City Mall, ...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027708,14.565899,American,700,Botswana Pula(P),0,0,0,3,4.4,5BA829,Very Good,858
4,6304833,Manam,162,Philippines,Makati City,"Level 1, Greenbelt 2, Ayala Center, Greenbelt,...","Greenbelt 2, San Lorenzo, Makati City","Greenbelt 2, San Lorenzo, Makati City, Makati ...",121.02038,14.552351,Filipino,700,Botswana Pula(P),0,0,0,3,4.7,3F7E00,Excellent,930


In [32]:
curated_data = df1.to_csv('../dataset/zomato_curated.csv', index=False)