# File preprocessing notebook
This notebook aim to apply some cleaning and filters for future data analysis operations

## First step : Importation of pandas library and creation of the dataframe

In [8]:
import pandas as pd

In [9]:
df = pd.read_csv('mapstr.csv')

In [10]:
df

Unnamed: 0,name,address,icon,userComment,tags
0,Barrio Meshica,"15 Rue de la Villette, 75019 Paris, France",restaurant,,💸💸#Hispanique#TheFork 🍴
1,Mile Time,"Baquerizo Moreno E7-70, Quito, Ecuador",restaurant,🇹🇼,💸#Asiat#Vege/Vegan 🌱
2,Chifa Fuzhao (Chifa Fu Zhao),"Avenida 6 de Diciembre, N25-23, 170524 Quito, ...",restaurant,🇨🇳,💸#Asiat
3,Fairuz Café Restaurante / Shawarma Show,"Avenida 6 de Diciembre, N25-23, 170524 Quito, ...",restaurant,,💸#Libanais 🇱🇧
4,Sant Just,"Calle 16A 2-73, 111711 Bogotá, D.C., Colombia",restaurant,,💸💸#Viande#Restaurant
...,...,...,...,...,...
829,O'Cheese Naans,"20 Rue Pablo Neruda, 77200 Torcy, France",restaurant,,Burger 🍔🍟#Grec 🥙#💸#À faire
830,Broadway Coffee,"4 Boulevard Carnot, 93250 Villemomble, France",fastfood,,💸#Burger 🍔🍟#À faire
831,Mamagaya,24 Avenue Henri Barbusse 93700 Drancy France,restaurant,,💸#Grec 🥙#Pâtes 🍝#À faire
832,La Maison Du Tacos,"23 Rue Saint-Spire, 91100 Corbeil-Essonnes, Fr...",restaurant,Meilleur endroit pour prendre un tacos FR . 🖕🏾...,💸#Halal#Tacos 🇫🇷#Validé ✅


## Second step : Filtering out the Private addresses, replacing the NaN values for comments to empty string and desc ordering to have the oldests addresses first 

In [11]:
privateless_df = df[~df['tags'].str.contains("Private")]

In [12]:
privateless_df['userComment'] = privateless_df['userComment'].fillna("")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  privateless_df['userComment'] = privateless_df['userComment'].fillna("")


In [13]:
privateless_df

Unnamed: 0,name,address,icon,userComment,tags
0,Barrio Meshica,"15 Rue de la Villette, 75019 Paris, France",restaurant,,💸💸#Hispanique#TheFork 🍴
1,Mile Time,"Baquerizo Moreno E7-70, Quito, Ecuador",restaurant,🇹🇼,💸#Asiat#Vege/Vegan 🌱
2,Chifa Fuzhao (Chifa Fu Zhao),"Avenida 6 de Diciembre, N25-23, 170524 Quito, ...",restaurant,🇨🇳,💸#Asiat
3,Fairuz Café Restaurante / Shawarma Show,"Avenida 6 de Diciembre, N25-23, 170524 Quito, ...",restaurant,,💸#Libanais 🇱🇧
4,Sant Just,"Calle 16A 2-73, 111711 Bogotá, D.C., Colombia",restaurant,,💸💸#Viande#Restaurant
...,...,...,...,...,...
829,O'Cheese Naans,"20 Rue Pablo Neruda, 77200 Torcy, France",restaurant,,Burger 🍔🍟#Grec 🥙#💸#À faire
830,Broadway Coffee,"4 Boulevard Carnot, 93250 Villemomble, France",fastfood,,💸#Burger 🍔🍟#À faire
831,Mamagaya,24 Avenue Henri Barbusse 93700 Drancy France,restaurant,,💸#Grec 🥙#Pâtes 🍝#À faire
832,La Maison Du Tacos,"23 Rue Saint-Spire, 91100 Corbeil-Essonnes, Fr...",restaurant,Meilleur endroit pour prendre un tacos FR . 🖕🏾...,💸#Halal#Tacos 🇫🇷#Validé ✅


In [14]:
privateless_df = privateless_df.iloc[::-1]

In [15]:
privateless_df = privateless_df.reset_index(drop=True)

In [16]:
privateless_df

Unnamed: 0,name,address,icon,userComment,tags
0,French Cantine,"65 Avenue Gambetta, 93170 Bagnolet, France",restaurant,Meilleur sandwich que j’ai mangé. Les plats de...,💸#Grec 🥙#Halal#Pâtes 🍝#Validé ✅
1,La Maison Du Tacos,"23 Rue Saint-Spire, 91100 Corbeil-Essonnes, Fr...",restaurant,Meilleur endroit pour prendre un tacos FR . 🖕🏾...,💸#Halal#Tacos 🇫🇷#Validé ✅
2,Mamagaya,24 Avenue Henri Barbusse 93700 Drancy France,restaurant,,💸#Grec 🥙#Pâtes 🍝#À faire
3,Broadway Coffee,"4 Boulevard Carnot, 93250 Villemomble, France",fastfood,,💸#Burger 🍔🍟#À faire
4,O'Cheese Naans,"20 Rue Pablo Neruda, 77200 Torcy, France",restaurant,,Burger 🍔🍟#Grec 🥙#💸#À faire
...,...,...,...,...,...
807,Sant Just,"Calle 16A 2-73, 111711 Bogotá, D.C., Colombia",restaurant,,💸💸#Viande#Restaurant
808,Fairuz Café Restaurante / Shawarma Show,"Avenida 6 de Diciembre, N25-23, 170524 Quito, ...",restaurant,,💸#Libanais 🇱🇧
809,Chifa Fuzhao (Chifa Fu Zhao),"Avenida 6 de Diciembre, N25-23, 170524 Quito, ...",restaurant,🇨🇳,💸#Asiat
810,Mile Time,"Baquerizo Moreno E7-70, Quito, Ecuador",restaurant,🇹🇼,💸#Asiat#Vege/Vegan 🌱


##

## Final Step : Export of the preprocessed dataframe into csv and parquet formats

In [18]:
privateless_df.to_parquet('addresses-notebook.parquet')

In [20]:
privateless_df.to_csv('addresses-notebook.csv')