# ETL archivos Yelp
El objetivo es poder entender la naturaleza, formato y estructura que conteinen los archivos de Yelp para luego realizar el proceso de ETL.

* business.pkl
* review.json
* user.parquet
* checkin.json
* tip.json


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyspark.sql import SparkSession
import pyarrow.parquet as pq
import json
import seaborn as sns

## checkin

In [2]:
def abrir_Archivo_json(archivo):
    merged_data = []  # Lista para almacenar los objetos JSON combinados

    with open(archivo) as file:
        for line in file:
            try:
                obj = json.loads(line)
                merged_data.append(obj)
            except json.JSONDecodeError as e:
                print(f"Error al decodificar JSON en el archivo {archivo}: {str(e)}")

    df = pd.DataFrame(merged_data)  # Crear DataFrame a partir de los objetos JSON
    return df


In [3]:
archivo = 'data/Yelp/checkin.json' 
df_checkin_yelp = abrir_Archivo_json(archivo)

In [4]:
#Revisamos si tiene nulos
df_checkin_yelp.isnull().sum()

business_id    0
date           0
dtype: int64

In [5]:
#Revisamos si hay duplicados
df_checkin_yelp.duplicated().sum()

0

Desanidado de Fecha y Hora

In [6]:
fecha = []
for _, row in df_checkin_yelp.iterrows():
    business_id = row['business_id']
    dates = row['date'].split(', ')
    for date in dates:
        time, date = date.split(' ')
        fecha.append([business_id, time, date])

df_desanidado = pd.DataFrame(fecha,columns =['business_id','hour','date'])

In [7]:
df_desanidado

Unnamed: 0,business_id,hour,date
0,---kPU91CF4Lq2-WlRu9Lw,2020-03-13,21:10:56
1,---kPU91CF4Lq2-WlRu9Lw,2020-06-02,22:18:06
2,---kPU91CF4Lq2-WlRu9Lw,2020-07-24,22:42:27
3,---kPU91CF4Lq2-WlRu9Lw,2020-10-24,21:36:13
4,---kPU91CF4Lq2-WlRu9Lw,2020-12-09,21:23:33
...,...,...,...
13356870,zzu6_r3DxBJuXcjnOYVdTw,2013-12-11,00:52:49
13356871,zzu6_r3DxBJuXcjnOYVdTw,2013-12-13,00:58:14
13356872,zzw66H6hVjXQEt0Js3Mo4A,2016-12-03,23:33:26
13356873,zzw66H6hVjXQEt0Js3Mo4A,2018-12-02,19:08:45


In [8]:
#Se almacena en un csv
df_desanidado.to_csv('data/Yelp/checkin.csv',index = False)

## user.parquet

## business.pkl

In [9]:
#se lee el archivo
df_business= pd.read_pickle('data/Yelp/business.pkl')

In [10]:
df_business

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,...,state.1,postal_code.1,latitude.1,longitude.1,stars.1,review_count.1,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,,93101,34.426679,-119.711197,5.0,7,...,,,,,,,,,,
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,,63123,38.551126,-90.335695,3.0,15,...,,,,,,,,,,
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,,85711,32.223236,-110.880452,3.5,22,...,,,,,,,,,,
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,80,...,,,,,,,,,,
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,MO,18054,40.338183,-75.471659,4.5,13,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150341,IUQopTMmYQG-qRtBk-8QnA,Binh's Nails,3388 Gateway Blvd,Edmonton,IN,T6J 5H2,53.468419,-113.492054,3.0,13,...,,,,,,,,,,
150342,c8GjPIOTGVmIemT7j5_SyQ,Wild Birds Unlimited,2813 Bransford Ave,Nashville,DE,37204,36.115118,-86.766925,4.0,5,...,,,,,,,,,,
150343,_QAMST-NrQobXduilWEqSw,Claire's Boutique,"6020 E 82nd St, Ste 46",Indianapolis,AB,46250,39.908707,-86.065088,3.5,8,...,,,,,,,,,,
150344,mtGm22y5c2UHNXDFAjaPNw,Cyclery & Fitness Center,2472 Troy Rd,Edwardsville,AB,62025,38.782351,-89.950558,4.0,24,...,,,,,,,,,,


In [11]:
#Se agregan nuevas columnas en el Dataframe Vacias
df_business['NAME']=None
df_business['REVIEW_COUNT']=None
df_business['POSTAL_CODE']=None
df_business['CITY']=None
df_business['STATE']=None
df_business['BUSINESS_ID']=None
df_business['ADDRESS']=None
df_business['LATITUDE']=None
df_business['LONGITUDE']=None
df_business['STARS']=None
df_business['IS_OPEN']=None
df_business['ATTRIBUTES']=None
df_business['CATEGORIES']=None
df_business['HOURS']=None

In [12]:
#Se actualiza la columna NAME del Dataframe donde se fusionan las cadenas de texto, eliminando los caracteres no Alfabeticos
for index,i in enumerate(df_business.name.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['NAME'][index]=''.join(arr)


In [13]:

# Configurar pandas para mostrar todas las columnas
pd.set_option('display.max_columns', None)


In [14]:
#Se actualiza la columna CITY del Dataframe donde se fusionan las cadenas de texto, eliminando los caracteres no Alfabeticos
for index,i in enumerate(df_business.city.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['CITY'][index]=''.join(arr)

In [15]:
#Se actualiza la columna STATE del Dataframe donde se fusionan las cadenas de texto, eliminando los caracteres no Alfabeticos
for index,i in enumerate(df_business.state.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['STATE'][index]=''.join(arr)


In [16]:
#Se actualiza la columna REVIEW_COUNT del Dataframe donde se fusionan las cadenas de texto, eliminando los caracteres no Alfabeticos
for index,i in enumerate(df_business.review_count.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['REVIEW_COUNT'][index]=''.join(arr)


In [17]:
#Se actualiza la columna BUSINESS_ID del Dataframe donde se fusionan las cadenas de texto, eliminando los caracteres no Alfabeticos
for index,i in enumerate(df_business.business_id.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['BUSINESS_ID'][index]=''.join(arr)


In [18]:
#Se actualiza la columna ADDRESS del Dataframe donde se fusionan las cadenas de texto, eliminando los caracteres no Alfabeticos
for index,i in enumerate(df_business.address.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['ADDRESS'][index]=''.join(arr)


In [19]:
#Se actualiza la columna POSTAL_CODE del Dataframe donde se fusionan las cadenas de texto, eliminando los caracteres no Alfabeticos
for index,i in enumerate(df_business.postal_code.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['POSTAL_CODE'][index]=''.join(arr)

In [20]:
#Se filtran los elementos numéricos mayores que 1 en cada valor de la columna 'latitude' del DataFrame 'df_business'.
# A continuación, se asigna el primer elemento filtrado a la columna 'LATITUDE' en el DataFrame 'df_business' en la fila
#correspondiente al índice actual.
for index,i in enumerate(df_business.latitude.values):
    arr=[]
    for e in i:
       if e>1:
         arr.append(e)
    df_business['LATITUDE'][index]=arr[0]


In [21]:
#Se filtran los elementos numéricos menores que -1 en cada valor de la columna 'longitude' del DataFrame 'df_business'.
#Luego, se asigna el primer elemento filtrado a la columna 'LONGITUDE' en el DataFrame 'df_business' en la fila correspondiente al
# índice actual.
for index,i in enumerate(df_business.longitude.values):
    arr=[]
    for e in i:
        if e<-1:
            arr.append(e)
    df_business['LONGITUDE'][index]=arr[0]

In [22]:
#Se filtran los elementos numéricos mayores que 0.1 en cada valor de la columna 'stars' del DataFrame 'df_business'.
# Después, se asigna el primer elemento filtrado a la columna 'STARS' en el DataFrame 'df_business' en la fila correspondiente 
#al índice actual.
for index,i in enumerate(df_business.stars.values):
    arr=[]
    for e in i:
       if e>0.1:
         arr.append(e)
    df_business['STARS'][index]=arr[0]


In [23]:
#Se filtran los elementos numéricos mayores o iguales a 0 en cada valor de la columna 'is_open' del DataFrame 'df_business'. 
#A continuación, se asigna el primer elemento filtrado a la columna 'IS_OPEN' en el DataFrame 'df_business' en la fila correspondiente al índice actual

for index,i in enumerate(df_business.is_open.values):
    arr=[]
    for e in i:
       if e >=0:
         arr.append(e)
    df_business['IS_OPEN'][index]=arr[0]

In [24]:
#Este código busca y guarda el primer diccionario encontrado en la columna 'attributes' del dataframe 'df_business' en la columna 'ATTRIBUTES'.
for index,i in enumerate(df_business.attributes.values):
    arr=[]
    for e in i:
        if isinstance(e,dict):
         arr.append(e)
    if len(arr)>0:
     df_business['ATTRIBUTES'][index]=arr[0]


In [25]:
#Este código filtra los elementos de tipo cadena en la columna 'categories' del dataframe 'df_business'. 
#Luego, fusiona todas las cadenas filtradas en una sola cadena y la asigna a la columna 'CATEGORIES' en el dataframe 'df_business'.
for index,i in enumerate(df_business.categories.values):
    arr=[]
    for e in i:
        if isinstance(e,str):
         arr.append(e)
    df_business['CATEGORIES'][index]=''.join(arr)

In [26]:
#En este código, se recorren los valores de la columna 'hours' en el dataframe 'df_business' y se filtran los elementos que son diccionarios. 
#Luego, se asigna el primer diccionario encontrado a la columna 'HOURS' en el dataframe 'df_business' en la fila correspondiente.
#En resumen, el código extrae y guarda el primer diccionario encontrado en la columna 'hours' del dataframe 'df_business' en la columna 'HOURS'.
for index,i in enumerate(df_business.hours.values):
    arr=[]
    for e in i:
        if isinstance(e,dict):
         arr.append(e)
    if len(arr)>0:     
     df_business['HOURS'][index]=arr[0]


In [28]:
df_business.sample(10)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,business_id.1,name.1,address.1,city.1,state.1,postal_code.1,latitude.1,longitude.1,stars.1,review_count.1,is_open.1,attributes.1,categories.1,hours.1,NAME,REVIEW_COUNT,POSTAL_CODE,CITY,STATE,BUSINESS_ID,ADDRESS,LATITUDE,LONGITUDE,STARS,IS_OPEN,ATTRIBUTES,CATEGORIES,HOURS
6481,-RzbRfPnKcwTixtokN-p6A,Cosi,215 Lombard St,Philadelphia,FL,19147,39.942257,-75.145956,3.5,5,0,,"Food, Coffee & Tea",,,,,,,,,,,,,,,,Cosi,,19147,Philadelphia,FL,-RzbRfPnKcwTixtokN-p6A,215 Lombard St,39.942257,-75.145956,3.5,0,,"Food, Coffee & Tea",
144944,v2GbFKx4gxfyk_rSQK0hjQ,A Discount Mobile Convertible Tops and Headliners,,Palm Harbor,NJ,34682,28.07463,-82.766556,3.5,5,1,"{'BusinessAcceptsCreditCards': 'False', 'WiFi'...","Auto Parts & Supplies, Auto Detailing, Furnitu...","{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",,,,,,,,,,,,,,,A Discount Mobile Convertible Tops and Headliners,,34682,Palm Harbor,NJ,v2GbFKx4gxfyk_rSQK0hjQ,,28.07463,-82.766556,3.5,1,"{'BusinessAcceptsCreditCards': 'False', 'WiFi'...","Auto Parts & Supplies, Auto Detailing, Furnitu...","{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ..."
86177,yTeH-Xqp2qob651GU6aE7Q,Oggi Pizzeria,218 E Davis Blvd,Tampa,LA,33606,27.92683,-82.45381,4.5,55,0,"{'Ambience': '{'touristy': False, 'hipster': F...","Food, Pizza, Restaurants, Salad, International...",{'Monday': '0:0-0:0'},,,,,,,,,,,,,,,Oggi Pizzeria,,33606,Tampa,LA,yTeH-Xqp2qob651GU6aE7Q,218 E Davis Blvd,27.92683,-82.45381,4.5,0,"{'Ambience': '{'touristy': False, 'hipster': F...","Food, Pizza, Restaurants, Salad, International...",{'Monday': '0:0-0:0'}
112285,5HY-o1nDuR0calZpsw5qiA,The Sensory Playce,1471 S Vinnell Way,Boise,TN,83709,43.591135,-116.286784,4.0,8,1,"{'WheelchairAccessible': 'True', 'BusinessAcce...","Kids Activities, Active Life, Playgrounds, Ind...","{'Monday': '0:0-0:0', 'Tuesday': '10:0-17:0', ...",,,,,,,,,,,,,,,The Sensory Playce,,83709,Boise,TN,5HY-o1nDuR0calZpsw5qiA,1471 S Vinnell Way,43.591135,-116.286784,4.0,1,"{'WheelchairAccessible': 'True', 'BusinessAcce...","Kids Activities, Active Life, Playgrounds, Ind...","{'Monday': '0:0-0:0', 'Tuesday': '10:0-17:0', ..."
12321,YAT0yzBf4o96xJJiWVsgNg,American Male,37 S 16th St,Philadelphia,FL,19102,39.951625,-75.167058,3.0,11,0,"{'ByAppointmentOnly': 'True', 'RestaurantsPric...","Hair Salons, Beauty & Spas, Skin Care",,,,,,,,,,,,,,,,American Male,,19102,Philadelphia,FL,YAT0yzBf4o96xJJiWVsgNg,37 S 16th St,39.951625,-75.167058,3.0,0,"{'ByAppointmentOnly': 'True', 'RestaurantsPric...","Hair Salons, Beauty & Spas, Skin Care",
145444,s8MTunfjxZslRLcjlTiCJg,North Star Bar,2639 Poplar St,Philadelphia,PA,19130,39.973016,-75.180598,3.5,134,0,"{'BusinessAcceptsCreditCards': 'True', 'GoodFo...","Food, Arts & Entertainment, Beer, Wine & Spiri...","{'Monday': '17:0-2:0', 'Tuesday': '17:0-2:0', ...",,,,,,,,,,,,,,,North Star Bar,,19130,Philadelphia,PA,s8MTunfjxZslRLcjlTiCJg,2639 Poplar St,39.973016,-75.180598,3.5,0,"{'BusinessAcceptsCreditCards': 'True', 'GoodFo...","Food, Arts & Entertainment, Beer, Wine & Spiri...","{'Monday': '17:0-2:0', 'Tuesday': '17:0-2:0', ..."
32553,_hBFNHhw8MHIlyNT3tuAKw,Wendy's,5250 South East Street,Indianapolis,PA,46227,39.68981,-86.149301,2.5,8,1,"{'GoodForKids': 'True', 'RestaurantsReservatio...","Fast Food, Restaurants, Burgers","{'Monday': '10:0-1:0', 'Tuesday': '10:0-1:0', ...",,,,,,,,,,,,,,,Wendy's,,46227,Indianapolis,PA,_hBFNHhw8MHIlyNT3tuAKw,5250 South East Street,39.68981,-86.149301,2.5,1,"{'GoodForKids': 'True', 'RestaurantsReservatio...","Fast Food, Restaurants, Burgers","{'Monday': '10:0-1:0', 'Tuesday': '10:0-1:0', ..."
127300,zgZuBaWSo7wmDoHzvKNTRQ,Erlich's Four Hour Cleaner and Shirt Laundry,1400 Washington Ave,Saint Louis,DE,63103,38.632264,-90.199838,1.0,9,0,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...","Dry Cleaning & Laundry, Local Services, Laundr...",,,,,,,,,,,,,,,,Erlich's Four Hour Cleaner and Shirt Laundry,,63103,Saint Louis,DE,zgZuBaWSo7wmDoHzvKNTRQ,1400 Washington Ave,38.632264,-90.199838,1.0,0,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...","Dry Cleaning & Laundry, Local Services, Laundr...",
78360,IeE2U3wFI1Wa0HbH4jJ2Iw,Esporta Fitness,2020 County Line Rd,Huntingdon Valley,MO,19006,40.169642,-75.070149,2.0,36,1,"{'ByAppointmentOnly': 'False', 'GoodForKids': ...","Yoga, Active Life, Fitness & Instruction, Gyms...","{'Monday': '0:0-0:0', 'Tuesday': '5:0-22:0', '...",,,,,,,,,,,,,,,Esporta Fitness,,19006,Huntingdon Valley,MO,IeE2U3wFI1Wa0HbH4jJ2Iw,2020 County Line Rd,40.169642,-75.070149,2.0,1,"{'ByAppointmentOnly': 'False', 'GoodForKids': ...","Yoga, Active Life, Fitness & Instruction, Gyms...","{'Monday': '0:0-0:0', 'Tuesday': '5:0-22:0', '..."
7845,Birks4csk8zknL5a5QCiow,Mello Freeze,1437 Bond Ave,East Saint Louis,NJ,62201,38.612051,-90.154869,5.0,5,1,"{'BusinessAcceptsCreditCards': 'True', 'Busine...","Ice Cream & Frozen Yogurt, Food","{'Monday': '12:0-21:0', 'Tuesday': '12:0-21:0'...",,,,,,,,,,,,,,,Mello Freeze,,62201,East Saint Louis,NJ,Birks4csk8zknL5a5QCiow,1437 Bond Ave,38.612051,-90.154869,5.0,1,"{'BusinessAcceptsCreditCards': 'True', 'Busine...","Ice Cream & Frozen Yogurt, Food","{'Monday': '12:0-21:0', 'Tuesday': '12:0-21:0'..."


In [29]:
df_business.to_csv('data/Yelp/business.csv',index = False)

In [3]:
dtypes = {'col14': str, 'col15': str, 'col16': str, 'col17': str, 'col18': str, 'col19': str, 'col25': str, 'col26': str, 'col27': str}
df_business = pd.read_csv('data/Yelp/business.csv', dtype=dtypes)

In [31]:
#Filtramos el dataframe solo a las columnas nuevas generadas con los bucles, con los nombres iniciales


In [32]:
# Obtén la lista de todas las columnas
all_columns = df_business.columns

# Itera sobre todas las columnas y aplica el método .loc
for col in all_columns:
    df_business.loc[:, col] = df_business[col]


In [4]:
df_business = df_business[['BUSINESS_ID', 'NAME', 'REVIEW_COUNT', 'CITY', 'STATE', 'ADDRESS', 'POSTAL_CODE', 'LATITUDE', 'LONGITUDE', 'STARS', 'IS_OPEN', 'ATTRIBUTES', 'CATEGORIES', 'HOURS']]

df_business.loc[:, 'business_id'] = df_business['BUSINESS_ID']
df_business.loc[:, 'name'] = df_business['NAME']
df_business.loc[:, 'address'] = df_business['ADDRESS']
df_business.loc[:, 'city'] = df_business['CITY']
df_business.loc[:, 'state'] = df_business['STATE']
df_business.loc[:, 'postal_code'] = df_business['POSTAL_CODE']
df_business.loc[:, 'latitude'] = df_business['LATITUDE']
df_business.loc[:, 'longitude'] = df_business['LONGITUDE']
df_business.loc[:, 'stars'] = df_business['STARS']
df_business.loc[:, 'review_count'] = df_business['REVIEW_COUNT']
df_business.loc[:, 'is_open'] = df_business['IS_OPEN']
df_business.loc[:, 'attributes'] = df_business['ATTRIBUTES']
df_business.loc[:, 'categories'] = df_business['CATEGORIES']
df_business.loc[:, 'hours'] = df_business['HOURS']

df_business = df_business[['business_id', 'name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'stars', 'review_count', 'is_open', 'attributes', 'categories', 'hours']]


KeyError: "None of [Index(['BUSINESS_ID', 'NAME', 'REVIEW_COUNT', 'CITY', 'STATE', 'ADDRESS',\n       'POSTAL_CODE', 'LATITUDE', 'LONGITUDE', 'STARS', 'IS_OPEN',\n       'ATTRIBUTES', 'CATEGORIES', 'HOURS'],\n      dtype='object')] are in the [columns]"

In [5]:
df_business

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,,93101,34.426679,-119.711197,5.0,,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,,63123,38.551126,-90.335695,3.0,,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,,85711,32.223236,-110.880452,3.5,,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,MO,18054,40.338183,-75.471659,4.5,,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150341,IUQopTMmYQG-qRtBk-8QnA,Binh's Nails,3388 Gateway Blvd,Edmonton,IN,T6J 5H2,53.468419,-113.492054,3.0,,1,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","Nail Salons, Beauty & Spas","{'Monday': '10:0-19:30', 'Tuesday': '10:0-19:3..."
150342,c8GjPIOTGVmIemT7j5_SyQ,Wild Birds Unlimited,2813 Bransford Ave,Nashville,DE,37204,36.115118,-86.766925,4.0,,1,"{'BusinessAcceptsCreditCards': 'True', 'Restau...","Pets, Nurseries & Gardening, Pet Stores, Hobby...","{'Monday': '9:30-17:30', 'Tuesday': '9:30-17:3..."
150343,_QAMST-NrQobXduilWEqSw,Claire's Boutique,"6020 E 82nd St, Ste 46",Indianapolis,AB,46250,39.908707,-86.065088,3.5,,1,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...","Shopping, Jewelry, Piercing, Toy Stores, Beaut...",
150344,mtGm22y5c2UHNXDFAjaPNw,Cyclery & Fitness Center,2472 Troy Rd,Edwardsville,AB,62025,38.782351,-89.950558,4.0,,1,"{'BusinessParking': ""{'garage': False, 'street...","Fitness/Exercise Equipment, Eyewear & Optician...","{'Monday': '9:0-20:0', 'Tuesday': '9:0-20:0', ..."


In [7]:
filtro = ['Restaurant']

# Filtrar el DataFrame solo para las filas donde 'categories' no es NaN
df_filtered = df_business.dropna(subset=['categories'])

# Luego, aplicar el filtro
restaurante = df_filtered[df_filtered['categories'].str.contains('|'.join(filtro), regex=True)]


In [8]:
restaurante

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
5,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,615 S Main St,Ashland City,AZ,37015,36.269593,-87.058943,2.0,,1,"{'BusinessParking': 'None', 'BusinessAcceptsCr...","Burgers, Fast Food, Sandwiches, Food, Ice Crea...","{'Monday': '0:0-0:0', 'Tuesday': '6:0-22:0', '..."
8,k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,8025 Mackenzie Rd,Affton,TN,63123,38.565165,-90.321087,3.0,,0,"{'Caters': 'True', 'Alcohol': ""u'full_bar'"", '...","Pubs, Restaurants, Italian, Bars, American (Tr...",
9,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,2312 Dickerson Pike,Nashville,MO,37207,36.208102,-86.768170,1.5,,1,"{'RestaurantsAttire': ""'casual'"", 'Restaurants...","Ice Cream & Frozen Yogurt, Fast Food, Burgers,...","{'Monday': '0:0-0:0', 'Tuesday': '6:0-21:0', '..."
11,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,,Tampa Bay,MO,33602,27.955269,-82.456320,4.0,,1,"{'Alcohol': ""'none'"", 'OutdoorSeating': 'None'...","Vietnamese, Food, Restaurants, Food Trucks","{'Monday': '11:0-14:0', 'Tuesday': '11:0-14:0'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150325,l9eLGG9ZKpLJzboZq-9LRQ,Wawa,19 N Bishop Ave,Clifton Heights,TN,19018,39.925656,-75.310344,3.0,,1,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Restaurants, Sandwiches, Convenience Stores, C...","{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', 'W..."
150327,cM6V90ExQD6KMSU3rRB5ZA,Dutch Bros Coffee,1181 N Milwaukee St,Boise,ID,83704,43.615401,-116.284689,4.0,,1,"{'WiFi': ""'free'"", 'RestaurantsGoodForGroups':...","Cafes, Juice Bars & Smoothies, Coffee & Tea, R...","{'Monday': '0:0-0:0', 'Tuesday': '0:0-17:0', '..."
150336,WnT9NIzQgLlILjPT0kEcsQ,Adelita Taqueria & Restaurant,1108 S 9th St,Philadelphia,MO,19147,39.935982,-75.158665,4.5,,1,"{'WheelchairAccessible': 'False', 'Restaurants...","Restaurants, Mexican","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'..."
150339,2O2K6SXPWv56amqxCECd4w,The Plum Pit,4405 Pennell Rd,Aston,PA,19014,39.856185,-75.427725,4.5,,1,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...","{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', 'W..."


In [37]:
restaurante.to_csv('data/Yelp/restaurante.csv')

In [38]:
df_business.to_csv('data/Yelp/business.csv',index = False)

## tip.json

In [39]:
json_objects=[]

with open('data/Yelp/tip.json', 'r',encoding='utf-8') as f:
    for line in f:
        json_objects.append(json.loads(line))
df_tip = pd.DataFrame(json_objects)

In [40]:
df_tip

Unnamed: 0,user_id,business_id,text,date,compliment_count
0,AGNUgVwnZUey3gcPCJ76iw,3uLgwr0qeCNMjKenHJwPGQ,Avengers time with the ladies.,2012-05-18 02:17:21,0
1,NBN4MgHP9D3cw--SnauTkA,QoezRbYQncpRqyrLH6Iqjg,They have lots of good deserts and tasty cuban...,2013-02-05 18:35:10,0
2,-copOvldyKh1qr-vzkDEvw,MYoRNLb5chwjQe3c_k37Gg,It's open even when you think it isn't,2013-08-18 00:56:08,0
3,FjMQVZjSqY8syIO-53KFKw,hV-bABTK-glh5wj31ps_Jw,Very decent fried chicken,2017-06-27 23:05:38,0
4,ld0AperBXk1h6UbqmM80zw,_uN0OudeJ3Zl_tf6nxg5ww,Appetizers.. platter special for lunch,2012-10-06 19:43:09,0
...,...,...,...,...,...
908910,eYodOTF8pkqKPzHkcxZs-Q,3lHTewuKFt5IImbXJoFeDQ,Disappointed in one of your managers.,2021-09-11 19:18:57,0
908911,1uxtQAuJ2T5Xwa_wp7kUnA,OaGf0Dp56ARhQwIDT90w_g,Great food and service.,2021-10-30 11:54:36,0
908912,v48Spe6WEpqehsF2xQADpg,hYnMeAO77RGyTtIzUSKYzQ,Love their Cubans!!,2021-11-05 13:18:56,0
908913,ckqKGM2hl7I9Chp5IpAhkw,s2eyoTuJrcP7I_XyjdhUHQ,Great pizza great price,2021-11-20 16:11:44,0


## review.json

In [9]:
def abrir_Archivo_json(archivo):
    merged_data = []  # Lista para almacenar los objetos JSON combinados

    with open(archivo,encoding = 'utf-8') as file:
        for line in file:
            try:
                obj = json.loads(line)
                merged_data.append(obj)
            except json.JSONDecodeError as e:
                print(f"Error al decodificar JSON en el archivo {archivo}: {str(e)}")

    df = pd.DataFrame(merged_data)  # Crear DataFrame a partir de los objetos JSON
    return df

In [10]:
archivo = ('data/Yelp/review.json')
review=abrir_Archivo_json(archivo)

In [None]:
review

Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is...",2018-07-07 22:09:11
1,BiTunyQ73aT9WBnpR9DZGw,OyoGAe7OKpv6SyGZT5g77Q,7ATYjTIgM3jUlt4UM3IypQ,5.0,1,0,1,I've taken a lot of spin classes over the year...,2012-01-03 15:28:18
2,saUsX_uimxRlCVr67Z4Jig,8g_iMtfSiwikVnbP2etR0A,YjUWPpI6HXG530lwP-fb2A,3.0,0,0,0,Family diner. Had the buffet. Eclectic assortm...,2014-02-05 20:30:30
3,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0,1,0,1,"Wow! Yummy, different, delicious. Our favo...",2015-01-04 00:01:03
4,Sx8TMOWLNuJBWer-0pcmoA,bcjbaE6dDog4jkNY91ncLQ,e4Vwtrqf-wpJfwesgvdgxQ,4.0,1,0,1,Cute interior and owner (?) gave us tour of up...,2017-01-14 20:54:15
...,...,...,...,...,...,...,...,...,...
6990275,H0RIamZu0B0Ei0P4aeh3sQ,qskILQ3k0I_qcCMI-k6_QQ,jals67o91gcrD4DC81Vk6w,5.0,1,2,1,Latest addition to services from ICCU is Apple...,2014-12-17 21:45:20
6990276,shTPgbgdwTHSuU67mGCmZQ,Zo0th2m8Ez4gLSbHftiQvg,2vLksaMmSEcGbjI5gywpZA,5.0,2,1,2,"This spot offers a great, affordable east week...",2021-03-31 16:55:10
6990277,YNfNhgZlaaCO5Q_YJR4rEw,mm6E4FbCMwJmb7kPDZ5v2Q,R1khUUxidqfaJmcpmGd4aw,4.0,1,0,0,This Home Depot won me over when I needed to g...,2019-12-30 03:56:30
6990278,i-I4ZOhoX70Nw5H0FwrQUA,YwAMC-jvZ1fvEUum6QkEkw,Rr9kKArrMhSLVE9a53q-aA,5.0,1,0,0,For when I'm feeling like ignoring my calorie-...,2022-01-19 18:59:27


In [11]:
restaurante_review = pd.merge(restaurante,review, on = 'business_id',how='inner')

In [12]:
restaurante_review

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,...,categories,hours,review_id,user_id,stars_y,useful,funny,cool,text,date
0,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,,...,"Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",BXQcBN0iAi1lAUxibGLFzA,6_SpY41LIHZuIaiDs5FMKA,4.0,0,0,1,This is nice little Chinese bakery in the hear...,2014-05-26 01:09:53
1,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,,...,"Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",uduvUCvi9w3T2bSGivCfXg,tCXElwhzekJEH6QJe3xs7Q,4.0,3,1,2,This is the bakery I usually go to in Chinatow...,2013-10-05 15:19:06
2,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,,...,"Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",a0vwPOqDXXZuJkbBW2356g,WqfKtI-aGMmvbA9pPUxNQQ,5.0,0,0,0,"A delightful find in Chinatown! Very clean, an...",2013-10-25 01:34:57
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,,...,"Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",MKNp_CdR2k2202-c8GN5Dw,3-1va0IQfK-9tUMzfHWfTA,5.0,5,0,5,I ordered a graduation cake for my niece and i...,2018-05-20 17:58:57
4,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,CA,19107,39.955505,-75.155564,4.0,,...,"Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ...",D1GisLDPe84Rrk_R4X2brQ,EouCKoDfzaVG0klEgdDvCQ,4.0,2,1,1,HK-STYLE MILK TEA: FOUR STARS\n\nNot quite su...,2013-10-25 02:31:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4724679,hn9Toz3s-Ei3uZPt7esExA,West Side Kebab House,2470 Guardian Road NW,Edmonton,NJ,T5T 1K8,53.509649,-113.675999,4.5,,...,"Middle Eastern, Restaurants","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",Lu5jvEjtPFUa-z9IzsMv7w,CnzkseV3WTE5W8o5Pa7fJA,5.0,2,0,2,YES.\n\nI have been to 2 other comparable Midd...,2017-03-13 19:47:08
4724680,hn9Toz3s-Ei3uZPt7esExA,West Side Kebab House,2470 Guardian Road NW,Edmonton,NJ,T5T 1K8,53.509649,-113.675999,4.5,,...,"Middle Eastern, Restaurants","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",WbpiPPmiZYfDutO46AOJrA,ZZQ0g2DnXw3iXv5LgGeY7Q,5.0,0,0,0,We had the beef plate medium rare that was mou...,2018-04-14 23:41:03
4724681,hn9Toz3s-Ei3uZPt7esExA,West Side Kebab House,2470 Guardian Road NW,Edmonton,NJ,T5T 1K8,53.509649,-113.675999,4.5,,...,"Middle Eastern, Restaurants","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",cdwE0S1ONp1ypsTN1e3mLg,C6jwPqUk8VB0FuZO7y2MHA,5.0,1,1,1,Excellent Mediterranean food! Always cooked to...,2018-06-19 02:02:41
4724682,hn9Toz3s-Ei3uZPt7esExA,West Side Kebab House,2470 Guardian Road NW,Edmonton,NJ,T5T 1K8,53.509649,-113.675999,4.5,,...,"Middle Eastern, Restaurants","{'Monday': '11:0-22:0', 'Tuesday': '11:0-22:0'...",uXnne-Ui2pvObTtT3MUYxg,9b0pmwxxGIuXKXIJh__mlQ,4.0,0,0,0,This was my first visit to West Side Kebab Hou...,2017-03-24 01:46:31


In [15]:
#Se crea un filtro de estados
'''
Florida = FL
Georgia = GA
Alabama = AL
Carolina del sur = SC
Carolina del Norte = NC

'''

estados = ['FL','GA','AL','NC','SC']


# Filtrar el DataFrame solo para las filas donde 'state' no es NaN
df_estados= restaurante_review.dropna(subset=['state'])

# Luego, aplicar el filtro
df_restaurante_review = df_estados[df_estados['state'].str.contains('|'.join(estados), regex=True)]

In [16]:
df_restaurante_review

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,...,categories,hours,review_id,user_id,stars_y,useful,funny,cool,text,date
162,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771,27.916116,-82.760461,4.5,,...,"Food, Delis, Italian, Bakeries, Restaurants","{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'...",oTTuahWNWzX_018P6O6_2g,z1Dfj8kz3KCArkXaIyaBIA,1.0,6,1,0,The worst Chicken Parm. Sandwich I've ever eat...,2014-05-25 21:52:30
163,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771,27.916116,-82.760461,4.5,,...,"Food, Delis, Italian, Bakeries, Restaurants","{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'...",R7DC4sHDcklrk1s1K93FDA,HvgKiuV36e9SzNqeA5zOfA,4.0,0,0,0,"Zio's, previously known as Cesarina's is a lar...",2018-07-26 16:25:04
164,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771,27.916116,-82.760461,4.5,,...,"Food, Delis, Italian, Bakeries, Restaurants","{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'...",8kDLAf-muASQfs5zDXpiyw,7BhiY0D84Lj04kjEWn5fIQ,5.0,0,0,0,"As an update to my previous review, we had tri...",2018-04-28 00:32:12
165,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771,27.916116,-82.760461,4.5,,...,"Food, Delis, Italian, Bakeries, Restaurants","{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'...",KupYGAYqAKVLP9cspQ-9TQ,0EjWviHaYwdaMaD8VBOHWA,5.0,1,1,0,The little deli belongs in little Italy. Wonde...,2015-08-30 18:56:08
166,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771,27.916116,-82.760461,4.5,,...,"Food, Delis, Italian, Bakeries, Restaurants","{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'...",hl4dIQIKphmMWH59WrO5-g,E47ejL3krT1wG8NvgtJDgw,4.0,2,1,1,Very good food for very good prices. I had the...,2011-10-28 20:06:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4723205,esBGrrmuZzSiECyRBoKvvA,Colony Grill - St. Petersburg,670 Central Ave,St. Petersburg,FL,33701,27.770872,-82.643069,4.5,,...,"Bars, Beer Bar, Nightlife, Wine Bars, Pizza, R...","{'Monday': '11:30-23:0', 'Tuesday': '11:30-23:...",wrqWvAdWD9YpB-1C2Xnp4w,E2uJ62_uEUu5wz2EnZ9CgA,2.0,0,4,0,Very small menu. Pizza is the ONLY food item ...,2021-11-04 02:43:27
4723206,esBGrrmuZzSiECyRBoKvvA,Colony Grill - St. Petersburg,670 Central Ave,St. Petersburg,FL,33701,27.770872,-82.643069,4.5,,...,"Bars, Beer Bar, Nightlife, Wine Bars, Pizza, R...","{'Monday': '11:30-23:0', 'Tuesday': '11:30-23:...",TvXYbjdP1yiVqV4ixgKWsQ,8aE275qBmEVUjb_nrE65CQ,5.0,1,0,0,This place is great! The space is big with a n...,2021-11-04 13:28:42
4723207,esBGrrmuZzSiECyRBoKvvA,Colony Grill - St. Petersburg,670 Central Ave,St. Petersburg,FL,33701,27.770872,-82.643069,4.5,,...,"Bars, Beer Bar, Nightlife, Wine Bars, Pizza, R...","{'Monday': '11:30-23:0', 'Tuesday': '11:30-23:...",dMhZvWaJAB957YIHjwKXWA,4wMvgdEVpFLCIhFANNBvGA,5.0,1,0,0,So first looking at the menu for this place yo...,2022-01-02 03:35:40
4723208,esBGrrmuZzSiECyRBoKvvA,Colony Grill - St. Petersburg,670 Central Ave,St. Petersburg,FL,33701,27.770872,-82.643069,4.5,,...,"Bars, Beer Bar, Nightlife, Wine Bars, Pizza, R...","{'Monday': '11:30-23:0', 'Tuesday': '11:30-23:...",mTgQG-wCDdAW8ahnukggJg,mbIemu2trEjtn8viGHD3dA,5.0,0,0,0,I lived in CT for 10 years and CT style pizza ...,2021-11-26 22:55:22


In [None]:
restaurante_review.to_csv('data/Yelp/restaurante_review.csv',index = False)

# Restaurante_review
Se trabaja sobre el DataFrame que almacena las review de los restaurantes

In [3]:
df_restaurante_review = pd.read_csv('data/Yelp/df_restaurante_review')

In [4]:
df_restaurante_review.sample(5)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,...,stars_y,useful,funny,cool,text,date,year,month,day,hour
63033,u7_3L1NBWgxhBM_B-cmmnA,Pizzeria Vetri,1939 Callowhill St,Philadelphia,FL,19130,39.960888,-75.171213,4.0,,...,4.0,1,0,1,One would think with Pizzeria Vetri being so c...,2014-08-12 16:55:31,2014,8,12,16
657455,y7WPOc2_vCoBkwK3xPJ8XA,Unit Su Vege,"2000 Hamilton St, Ste 106",Philadelphia,FL,19130,39.962183,-75.17219,4.5,,...,5.0,1,0,1,Excellent vegan/vegetarian food. We ordered t...,2020-03-01 01:05:35,2020,3,1,1
227478,0oSqhCkK1qCY3I7-o0ZR1A,Garbanzo Mediterranean Fresh,8143 Maryland Ave,Clayton,FL,63105,38.652585,-90.343073,4.0,,...,5.0,1,0,0,I told the staff I had never been in before a...,2021-08-08 17:26:06,2021,8,8,17
598863,bCZFb9v6OQaNTgupVw9WEg,Fulin's Asian Cuisine,206 N Anderson Ln,Hendersonville,FL,37075,36.322611,-86.59773,3.5,,...,1.0,0,0,0,I've eaten at Fulins for many years and the fo...,2016-04-01 23:49:02,2016,4,1,23
695373,dgyKoIYEYrmRK_hktWBZdA,The Getaway,13090 Gandy Blvd N,Saint Petersburg,FL,33702,27.871061,-82.611473,3.0,,...,1.0,0,0,0,"This is a great location , right on the water ...",2017-12-24 18:26:50,2017,12,24,18


In [8]:
# Convertir la columna 'date' al formato de fecha
df_restaurante_review['date'] = pd.to_datetime(df_restaurante_review['date'])

# Agregar una nueva columna llamada 'year' que muestra el año de la columna 'date'
df_restaurante_review['year'] = df_restaurante_review['date'].dt.year
# Agregar una nueva columna llamada 'month' que muestra el mes de la columna 'date'
df_restaurante_review['month'] = df_restaurante_review['date'].dt.month
## Agregar una nueva columna llamada 'day' que muestra el dia de la columna 'date'
df_restaurante_review['day'] = df_restaurante_review['date'].dt.day


In [10]:
#agregar una nueva columa llamada 'hopur' que muestra la hora de la columna 'date'
df_restaurante_review['hour'] = df_restaurante_review['date'].dt.hour


In [13]:
df_restaurante_review.sample(10)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,...,stars_y,useful,funny,cool,text,date,year,month,day,hour
133012,vOXoM9W8-CpHbZUiGbL3dw,Pho Queen,29000 US Highway 19 N,Clearwater,FL,33761,28.035875,-82.739051,3.5,,...,5.0,0,0,0,Awesome authentic Vietnamese restaurant in the...,2018-02-16 18:17:12,2018,2,16,18
572601,LwX2vbzttWYAdoBoVtfvRg,City Tap House,"3925 Walnut St, Fl 2, Radian Balcony",Philadelphia,FL,19104,39.954309,-75.201352,3.5,,...,1.0,0,0,0,I was so offended by my recent experience at c...,2016-01-23 20:34:02,2016,1,23,20
631476,LfaaI92M2UtiT3JriY_FWQ,El Paso Mexican Grill,601 Veterans Blvd,Metairie,FL,70005,30.001731,-90.128735,3.0,,...,3.0,1,0,0,We have been going on average every two weeks....,2016-07-22 00:18:32,2016,7,22,0
500283,8P8HIysyyGbG8_94cG1Jyg,Entree BYOB,1608 South St,Philadelphia,FL,19146,39.944011,-75.16914,4.5,,...,5.0,0,0,0,We had a party of eight for Restuarant week. O...,2015-01-21 03:39:56,2015,1,21,3
509560,cRvoGf7krkQKF85UvOQo_A,Chihuahua's Cantina & Grill,"7111 S Virginia St, Ste C",Reno,FL,89511,39.461741,-119.782225,4.0,,...,5.0,0,0,0,"My favorite Mexican restuarant by a long way, ...",2018-02-18 05:11:29,2018,2,18,5
130052,AVf4RO3bh2AAoZeCiPTQNg,Hattie B's Hot Chicken - Melrose,2222 8th Ave S,Nashville,FL,37204,36.129008,-86.777565,4.5,,...,5.0,1,0,1,We were on a cross country road trip from Pens...,2019-04-17 22:16:24,2019,4,17,22
732356,vRle3eQO4T767azL09rFnA,Lodge On The Desert,306 N Alvernon Way,Tucson,FL,85711,32.226041,-110.908959,3.5,,...,4.0,0,0,0,"Excellent choice of fresh, local ingredients a...",2011-09-15 17:48:41,2011,9,15,17
614536,dGeXdSMah56gEHwZNaRQKA,Juan's Flying Burrito,4724 S Carrollton Ave,New Orleans,FL,70119,29.973816,-90.100877,3.5,,...,3.0,1,0,0,"Juan's Flying Burrito: great food, slow servic...",2010-03-29 19:32:28,2010,3,29,19
551713,Ah0ZcMgzzigx-qLXznni3A,Bighorn Tavern,"1325 W 7th St, Ste G",Reno,FL,89503,39.531079,-119.834014,4.5,,...,5.0,0,0,0,Food was tasty. Space is unassuming. Service...,2019-07-28 22:31:13,2019,7,28,22
329573,Qo14LZITJ4fIhSdqOVsm6w,Pat O'Brien's Courtyard Restaurant,624 Bourbon St,New Orleans,FL,70130,29.958188,-90.065854,4.0,,...,3.0,0,0,0,The Hurricane tasted like a refreshing cool ai...,2015-08-27 21:01:23,2015,8,27,21


In [12]:
df_restaurante_review.to_csv('data/Yelp/df_restaurante_review',index = False)