# Cleaning Data For Food Recommendation

# Joel Parker

# Data from: https://www.mavenanalytics.io/data-playground?page=2&pageSize=5 (Restaurant Ratings)

# Import Packages

In [11]:
import pandas as pd
import numpy as np

In [22]:
rest = pd.read_csv('restaurants.csv')
rest

Unnamed: 0,Restaurant_ID,Name,City,State,Country,Zip_Code,Latitude,Longitude,Alcohol_Service,Smoking_Allowed,Price,Franchise,Area,Parking
0,132560,Puesto de Gorditas,Ciudad Victoria,Tamaulipas,Mexico,,23.752304,-99.166913,,Yes,Low,No,Open,Public
1,132561,Cafe Ambar,Ciudad Victoria,Tamaulipas,Mexico,,23.726819,-99.126506,,No,Low,No,Closed,
2,132564,Church's,Ciudad Victoria,Tamaulipas,Mexico,,23.730925,-99.145185,,No,Low,No,Closed,
3,132572,Cafe Chaires,San Luis Potosi,San Luis Potosi,Mexico,,22.141647,-100.992712,,No,Low,No,Closed,Yes
4,132583,McDonalds Centro,Cuernavaca,Morelos,Mexico,62000.0,18.922290,-99.234332,,No,Low,Yes,Closed,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,135088,Cafeteria Cenidet,Cuernavaca,Morelos,Mexico,,18.876011,-99.219890,,No,Low,No,Closed,Public
126,135104,Vips,Ciudad Victoria,Tamaulipas,Mexico,,23.752982,-99.168434,Full Bar,No,Medium,Yes,Closed,Yes
127,135106,El Rincón De San Francisco,San Luis Potosi,San Luis Potosi,Mexico,78000.0,22.149709,-100.976093,Wine & Beer,Bar Only,Medium,No,Open,
128,135108,Potzocalli,San Luis Potosi,San Luis Potosi,Mexico,,22.136253,-100.933585,,No,Low,No,Closed,


In [23]:
rest_cuisines = pd.read_csv('restaurant_cuisines.csv')
rest_cuisines

Unnamed: 0,Restaurant_ID,Cuisine
0,132560,Regional
1,132572,Cafeteria
2,132583,American
3,132584,Mexican
4,132594,Mexican
...,...,...
107,135086,Fast Food
108,135088,Cafeteria
109,135104,Mexican
110,135106,Mexican


# Merge Dataframes

In [24]:
restaurant_df = pd.merge(rest, rest_cuisines, on='Restaurant_ID')

In [25]:
restaurant_df

Unnamed: 0,Restaurant_ID,Name,City,State,Country,Zip_Code,Latitude,Longitude,Alcohol_Service,Smoking_Allowed,Price,Franchise,Area,Parking,Cuisine
0,132560,Puesto de Gorditas,Ciudad Victoria,Tamaulipas,Mexico,,23.752304,-99.166913,,Yes,Low,No,Open,Public,Regional
1,132572,Cafe Chaires,San Luis Potosi,San Luis Potosi,Mexico,,22.141647,-100.992712,,No,Low,No,Closed,Yes,Cafeteria
2,132583,McDonalds Centro,Cuernavaca,Morelos,Mexico,62000.0,18.922290,-99.234332,,No,Low,Yes,Closed,,American
3,132584,Gorditas Doña Tota,Ciudad Victoria,Tamaulipas,Mexico,,23.752365,-99.165288,,No,Medium,Yes,Closed,Yes,Mexican
4,132594,Tacos De Barbacoa Enfrente Del Tec,Ciudad Victoria,Tamaulipas,Mexico,,23.752168,-99.165709,,No,Low,No,Open,Public,Mexican
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,135086,McDonalds Parque Tangamanga,San Luis Potosi,San Luis Potosi,Mexico,78290.0,22.141421,-101.013955,,No,Medium,Yes,Closed,Yes,Fast Food
108,135088,Cafeteria Cenidet,Cuernavaca,Morelos,Mexico,,18.876011,-99.219890,,No,Low,No,Closed,Public,Cafeteria
109,135104,Vips,Ciudad Victoria,Tamaulipas,Mexico,,23.752982,-99.168434,Full Bar,No,Medium,Yes,Closed,Yes,Mexican
110,135106,El Rincón De San Francisco,San Luis Potosi,San Luis Potosi,Mexico,78000.0,22.149709,-100.976093,Wine & Beer,Bar Only,Medium,No,Open,,Mexican


# Remove Unnecessary Columns 

In [26]:
restaurant_df.columns

Index(['Restaurant_ID', 'Name', 'City', 'State', 'Country', 'Zip_Code',
       'Latitude', 'Longitude', 'Alcohol_Service', 'Smoking_Allowed', 'Price',
       'Franchise', 'Area', 'Parking', 'Cuisine'],
      dtype='object')

In [27]:
restaurant_df.drop(['City', 'State', 'Country', 'Zip_Code',
       'Latitude', 'Longitude', 'Alcohol_Service', 'Smoking_Allowed', 'Price',
       'Franchise', 'Area', 'Parking'], axis=1, inplace=True)

In [28]:
restaurant_df

Unnamed: 0,Restaurant_ID,Name,Cuisine
0,132560,Puesto de Gorditas,Regional
1,132572,Cafe Chaires,Cafeteria
2,132583,McDonalds Centro,American
3,132584,Gorditas Doña Tota,Mexican
4,132594,Tacos De Barbacoa Enfrente Del Tec,Mexican
...,...,...,...
107,135086,McDonalds Parque Tangamanga,Fast Food
108,135088,Cafeteria Cenidet,Cafeteria
109,135104,Vips,Mexican
110,135106,El Rincón De San Francisco,Mexican


# Export CSV

In [29]:
restaurant_df.to_csv('restaurant_data.csv')

# Remove Columns from Ratings Dataframe

In [35]:
restaurant_ratings = pd.read_csv('ratings.csv')
restaurant_ratings

Unnamed: 0,Consumer_ID,Restaurant_ID,Overall_Rating,Food_Rating,Service_Rating
0,U1077,135085,2,2,2
1,U1077,135038,2,2,1
2,U1077,132825,2,2,2
3,U1077,135060,1,2,2
4,U1068,135104,1,1,2
...,...,...,...,...,...
1156,U1043,132630,1,1,1
1157,U1011,132715,1,1,0
1158,U1068,132733,1,1,0
1159,U1068,132594,1,1,1


In [37]:
restaurant_ratings.drop(['Food_Rating', 'Service_Rating'], axis=1, inplace=True)

In [38]:
restaurant_ratings

Unnamed: 0,Consumer_ID,Restaurant_ID,Overall_Rating
0,U1077,135085,2
1,U1077,135038,2
2,U1077,132825,2
3,U1077,135060,1
4,U1068,135104,1
...,...,...,...
1156,U1043,132630,1
1157,U1011,132715,1
1158,U1068,132733,1
1159,U1068,132594,1


In [39]:
restaurant_ratings.to_csv('restaurant_ratings.csv')