# Pizza Lovers
## Your data analysis of pizzerias in Paris

### Import of the data

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('basic_data.csv', delimiter = '|')

### Split of the postal code and the city

In [2]:
df

Unnamed: 0,name,average_rate,nb_of_reviews,address
0,Pizza Wawa,45,1535,"35 Rue Saint-Honoré, 75001 Paris"
1,Pizzeria Iovine's,45,2591,"7bis Rue du Colonel Driant, 75001 Paris"
2,Milo,46,579,"44 Rue Saint-Honoré, 75001 Paris"
3,La Tavola Calda,45,513,"39 Rue des Bourdonnais, 75001 Paris"
4,Liberto,47,1697,"23 Rue Berger, 75001 Paris"
...,...,...,...,...
467,La Trattoria di Bellagio,41,422,"101 Av. des Ternes, 75017 Paris"
468,La Monella Pizzeria,42,86,"2 Rue Lemercier, 75017 Paris"
469,Captain's Pizza,45,46,"13 Rue Brey, 75017 Paris"
470,CIRO E I SUOI FRATELLI,48,154,"34 Rue Saint-Ferdinand, 75017 Paris"


### Cleaning the data
#### Remove duplicates

In [3]:
df = df.drop_duplicates()

#### Show the restaurants that do not have Paris in their address

In [4]:
pd.set_option('display.max_rows', None)
df[~df['address'].str.contains('Paris')]

# Deleting the 306th and 411th because there are not in Paris

df = df.drop([306,411])
df[~df['address'].str.contains('Paris')]
pd.set_option('display.max_rows', 10)

In [5]:
# Examine the address of the last one
df[df['name'].str.contains('CAMPISI')]
df.at[369, 'address'] = 'Angle rue de Boulanvilliers, 1 Rue des Bauches, 75016 Paris'

# Check 
df[~df['address'].str.contains('Paris')]

Unnamed: 0,name,average_rate,nb_of_reviews,address


#### Look for the empties in each columns

In [6]:
df.isna().sum()

name             0
average_rate     0
nb_of_reviews    0
address          0
dtype: int64

#### Display types of each column an change type of columns

In [7]:
df.dtypes
# modification of average rate
df['average_rate'] = df['average_rate'].str.replace(',','.')
df['average_rate'] = pd.to_numeric(df['average_rate'])

# We are going to use .str accessor for the name and address

df.dtypes

name              object
average_rate     float64
nb_of_reviews      int64
address           object
dtype: object

#### Display unique values of average_rate and nb_of_reviews columns

In [8]:
# df.average_rate.unique()
# df.nb_of_reviews.unique()

### Transform the data

#### Add a "city" column

In [9]:
# Split the column and transform it into Series
splitted_series = df.address.str.split(expand=False)

list_of_cities = []
# create a list of the cities
for city in splitted_series:
    list_of_cities.append(city[-1])
    
df['city'] = list_of_cities

### Add a "postal_code" column

In [10]:
splitted_series = df.address.str.split(expand=False)

list_of_postal_code = []
for string in splitted_series:
    list_of_postal_code.append(string[-2])
    
# Check the unique values
# set(list_of_postal_code)
# len(list_of_postal_code)

df['postal_code'] = list_of_postal_code

In [11]:
# Modification of the 75116 by 75016
df['postal_code'] = df['postal_code'].replace('75116', '75016')

In [12]:
df.to_csv('basic_data_cleaned.csv', sep = '|')

## Analysis
### Statistics by district

#### First three districts by number of pizzerias

In [13]:
df.groupby(['postal_code'])['postal_code'].count().nlargest(3)

postal_code
75016    103
75005     68
75001     62
Name: postal_code, dtype: int64

#### First three district by average rating 

In [14]:
df.groupby(['postal_code'])['average_rate'].mean().nlargest(3)

postal_code
75009    4.688889
75010    4.600000
75020    4.550000
Name: average_rate, dtype: float64

#### First three pizzerias of each district by average rate

In [15]:
# val = input("Entrer the district you want :")

# df_3 = df.loc[df['postal_code'] == val].sort_values(by=['average_rate'], ascending=False).head(3)

In [16]:
df_3 = df.sort_values(by=['average_rate'], ascending=False)
df_3 = df_3.drop(['address', 'city'], axis=1)
df_3

Unnamed: 0,name,average_rate,nb_of_reviews,postal_code
118,Univers Pizza,5.0,80,75005
168,Panuozzo,5.0,1,75005
48,Pizza Democrazia,5.0,1,75001
91,PALLA PIZZA,5.0,2,75010
379,Chez Achille,4.9,175,75016
...,...,...,...,...
281,Domino's Pizza Paris 13 - Tolbiac,2.6,297,75013
176,Les Balkans,2.5,82,75005
39,ACDS Restauration,2.5,2,75001
358,Pizzeria Tradition,2.0,8,75016


#### First three pizzerias of each district by number of reviews

In [17]:
# val = input("Entrer the district you want :")

# df.loc[df['postal_code'] == val].sort_values(by=['nb_of_reviews'], ascending=False).head(3)

### Example for the graph

In [18]:
df_3 = df.sort_values(by=['average_rate'], ascending=False)
df_3 = df_3.drop(['address', 'city'], axis=1)
df_3

Unnamed: 0,name,average_rate,nb_of_reviews,postal_code
118,Univers Pizza,5.0,80,75005
168,Panuozzo,5.0,1,75005
48,Pizza Democrazia,5.0,1,75001
91,PALLA PIZZA,5.0,2,75010
379,Chez Achille,4.9,175,75016
...,...,...,...,...
281,Domino's Pizza Paris 13 - Tolbiac,2.6,297,75013
176,Les Balkans,2.5,82,75005
39,ACDS Restauration,2.5,2,75001
358,Pizzeria Tradition,2.0,8,75016


## Vizualization with Dash

In [19]:
import jupyter_dash
from jupyter_dash import JupyterDash
import plotly.express as px

import dash
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
from dash import dcc, html


jupyter_dash.JupyterDash

app = JupyterDash(__name__, external_stylesheets=[dbc.themes.CYBORG])

app.layout = html.Div(children=
    [
    html.H1(children='Hello Dash'),

    html.Div(children='''
        Dash: A web application framework for your data.
    '''),
    html.Div(
        dash.dcc.Dropdown(options= df_3['postal_code'].sort_values().unique(),
        id="district-filter",
        value = df_3['postal_code'].min()
        )),
    
    dcc.Graph(id='graph-with-dropdown'),
    ]
)

@app.callback(
    Output('graph-with-dropdown', 'figure'),
    Input('district-filter', 'value')
)

def update_figure(selected_district):
    filtered_df = df_3[df_3.postal_code == selected_district].head(5)
    
    fig = px.bar(filtered_df, 
                 x='name', y='average_rate', barmode="group",
                template='plotly_dark')

    fig.update_layout(transition_duration=500, yaxis_range=[filtered_df['average_rate'].min() - 0.2 ,5])
    
    return fig
    
if __name__ == '__main__':
    app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/
