## In this notebook, I proceed with the cleaning
of Michelin Star Restaurants in Spain.

## Import libraries

In [1]:
import pandas as pd
import numpy as np

## 1. Import database and data exploration

In [2]:
restaurants = pd.read_csv("../data/restaurants.csv", encoding = "ISO-8859-1",sep = (";"), engine='python', error_bad_lines=False, warn_bad_lines=False)

In [3]:
restaurants.shape

(220, 14)

In [4]:
restaurants.head()

Unnamed: 0,restaurant_name,michelin_stars,city,province,CCAA,coast_province,coordenates,price_menu_1,price_menu_2,price_average,type_of_cooking1,type_of_cooking2,International_cooking,regional_cooking
0,Mantua,1,Jerez de la Frontera,Cadiz,Andalucia,Yes,"36.6874279,-6.1380431",75,90.0,82.5,Moderna,,,
1,Lu Cocina y Alma,1,Jerez de la Frontera,Cadiz,Andalucia,Yes,"36.6876237,-6.1376777",90,150.0,120.0,Moderna,,,
2,Aponiente,3,El Puerto de Santa Maria,Cadiz,Andalucia,Yes,"36.605854,-6.2157528",215,,215.0,Creativa,Innovadora,,
3,Alevante,1,Novo Sancti Petri,Cadiz,Andalucia,Yes,"36.3442398,-6.1616944",115,135.0,125.0,Creativa,Innovadora,,
4,Abantal,1,Sevilla,Sevilla,Andalucia,No,"37.3879127,-5.98587",80,100.0,90.0,Creativa,,,


In [5]:
restaurants.columns

Index(['restaurant_name', 'michelin_stars', 'city', 'province', 'CCAA',
       'coast_province', 'coordenates', 'price_menu_1', 'price_menu_2',
       'price_average', 'type_of_cooking1', 'type_of_cooking2',
       'International_cooking', 'regional_cooking'],
      dtype='object')

In [6]:
restaurants.dtypes

restaurant_name           object
michelin_stars             int64
city                      object
province                  object
CCAA                      object
coast_province            object
coordenates               object
price_menu_1               int64
price_menu_2             float64
price_average            float64
type_of_cooking1          object
type_of_cooking2          object
International_cooking     object
regional_cooking          object
dtype: object

## 2. Cleaning

### 2.1. Deleting blank rows

In [7]:
resturants = restaurants.drop(restaurants[restaurants.CCAA.isnull()].index, inplace = True)

In [8]:
restaurants.shape

(220, 14)

### 2.2. Renaming columns

In [9]:
restaurants = restaurants.rename(columns={'International_cooking': 'international_cooking'})
restaurants = restaurants.rename(columns={'CCAA': 'region'})
restaurants = restaurants.rename(columns={'restaurant_name': 'restaurant'})

### 2.3. Replacing values

Replace all "ñ" letters in our database.

In [10]:
restaurants.loc[restaurants["region"] == "Catalua", "region"] = "Cataluna"
restaurants.loc[restaurants["province"] == "La Corua", "province"] = "La Coruna"
restaurants.loc[restaurants["city"] == "La Corua", "city"] = "La Coruna"

### 2.4. Spliting coordenates column into latitude and longitude

In [11]:
coordenates = restaurants["coordenates"].str.split(",", n = 1, expand = True)

In [12]:
restaurants["latitude"]= coordenates[0]
restaurants["longitude"]= coordenates[1]

In [13]:
restaurants.shape

(220, 16)

### 2.5. Reordering columns

In [14]:
restaurants = restaurants.reindex(columns = ['restaurant','michelin_stars','city','province','region','coast_province','coordenates','latitude','longitude','price_menu_1','price_menu_2','price_average','type_of_cooking1','type_of_cooking2','international_cooking','regional_cooking'])

## 3. Exporting cleaning data

In [15]:
restaurants = restaurants.reset_index(drop=True)

In [16]:
restaurants.head()

Unnamed: 0,restaurant,michelin_stars,city,province,region,coast_province,coordenates,latitude,longitude,price_menu_1,price_menu_2,price_average,type_of_cooking1,type_of_cooking2,international_cooking,regional_cooking
0,Mantua,1,Jerez de la Frontera,Cadiz,Andalucia,Yes,"36.6874279,-6.1380431",36.6874279,-6.1380431,75,90.0,82.5,Moderna,,,
1,Lu Cocina y Alma,1,Jerez de la Frontera,Cadiz,Andalucia,Yes,"36.6876237,-6.1376777",36.6876237,-6.1376777,90,150.0,120.0,Moderna,,,
2,Aponiente,3,El Puerto de Santa Maria,Cadiz,Andalucia,Yes,"36.605854,-6.2157528",36.605854,-6.2157528,215,,215.0,Creativa,Innovadora,,
3,Alevante,1,Novo Sancti Petri,Cadiz,Andalucia,Yes,"36.3442398,-6.1616944",36.3442398,-6.1616944,115,135.0,125.0,Creativa,Innovadora,,
4,Abantal,1,Sevilla,Sevilla,Andalucia,No,"37.3879127,-5.98587",37.3879127,-5.98587,80,100.0,90.0,Creativa,,,


In [17]:
restaurants.to_csv("../output/restaurants.csv", index = False)