## Data files Preparation

In [1]:
!pip install kaggle
!pip install opendatasets









### Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import json
from sqlite3 import dbapi2 as sq3
from pathlib import Path
from collections import OrderedDict

import os
import opendatasets as od

from time import time
from IPython.display import clear_output

import warnings
warnings.filterwarnings('ignore')

### Loading Data

In [3]:
od.download(
    "https://www.kaggle.com/datasets/yelp-dataset/yelp-dataset")

Skipping, found downloaded files in ".\yelp-dataset" (use force=True to force download)


In [4]:
download_path = os.path.abspath('yelp-dataset')

In [5]:
#Data Paths
reviews = download_path+'\yelp_academic_dataset_review.json'      #Contains full review text data including the user_id that wrote the review and the business_id the review is written for.
business = download_path+'\yelp_academic_dataset_business.json'  #Contains business data including location data, attributes, and categories.
users = download_path+'\yelp_academic_dataset_user.json'          #User data including the user's friend mapping and all the metadata associated with the user.

In [6]:
def load_rows(file_path, nrows=None, only_return_count=False, verbose=True):
    """
    Returns dataframe from json file
    """
    tic = time()
    with open(file_path) as json_file:
        count = 0
        objs = []
        line = json_file.readline()
        while (nrows is None or count<nrows) and line:
            count += 1
            if not only_return_count:
                obj = json.loads(line)
                objs.append(obj)
            line = json_file.readline()
        toc = time()
        if verbose:
            print(file_path.split('/')[-1], 'loaded. Count =', count, ', Time =', round(toc-tic,2), 'secs.')
        
        if only_return_count:
            return count
        
        return pd.DataFrame(objs)

In [7]:
#data generator to load data in chunks
def load_rows_gen(file_path, nrows=1e6, verbose=True):
    """
    Returns data in chunks
    """
    with open(file_path, encoding="utf-8") as json_file:
        line = json_file.readline()
        total = 0
        while line:
            count = 0
            objs = []
            tic = time()
            while count<nrows and line:
                count+=1
                obj = json.loads(line)
                objs.append(obj)
                line = json_file.readline()
                total += count
            toc = time()
            print('Loaded chunk of size:', count, ", Time =", round(toc-tic,2), 'secs.')
            yield pd.DataFrame(objs)

In [8]:
def read_json(file):
    data_file = open(file, encoding="utf-8")
    data = []

    for line in data_file:
        data.append(json.loads(line))

    df = pd.DataFrame(data)
    data_file.close()
    return df


### Overview of the data

In [9]:
df_reviewer = load_rows(users,5)
df_reviewer

C:\Users\isabe\Downloads\yelp-dataset\yelp_academic_dataset_user.json loaded. Count = 5 , Time = 0.0 secs.


Unnamed: 0,user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,...,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos
0,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007-01-25 16:47:26,7217,1259,5994,2007,"NSCy54eWehBJyZdG2iE84w, pe42u7DcCH2QmI81NX-8qA...",267,...,65,55,56,18,232,844,467,467,239,180
1,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,2009-01-25 04:35:42,43091,13066,27281,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...","ueRPE0CX75ePGMqOFVj6IQ, 52oH4DrRvzzl8wh5UXyU0A...",3138,...,264,184,157,251,1847,7054,3131,3131,1521,1946
2,2WnXYQFK0hXEoTxPtV2zvg,Steph,665,2008-07-25 10:41:00,2086,1010,1003,20092010201120122013,"LuO3Bn4f3rlhyHIaNfTlnA, j9B4XdHUhDfTKVecyWQgyA...",52,...,13,10,17,3,66,96,119,119,35,18
3,SZDeASXq7o05mMNLshsdIA,Gwen,224,2005-11-29 04:38:33,512,330,299,200920102011,"enx1vVPnfdNUdPho6PH_wg, 4wOcvMLtU6a9Lslggq74Vg...",28,...,4,1,6,2,12,16,26,26,10,9
4,hA5lMy-EnncsH4JoR-hFGQ,Karen,79,2007-01-05 19:40:59,29,15,7,,"PBK4q9KEEBHhFvSXCUirIw, 3FWPpM7KU1gXeOM_ZbYMbA...",1,...,1,0,0,0,1,1,0,0,0,0


In [10]:
df_reviews = load_rows(reviews, 5)
df_reviews

C:\Users\isabe\Downloads\yelp-dataset\yelp_academic_dataset_review.json loaded. Count = 5 , Time = 0.0 secs.


Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is...",2018-07-07 22:09:11
1,BiTunyQ73aT9WBnpR9DZGw,OyoGAe7OKpv6SyGZT5g77Q,7ATYjTIgM3jUlt4UM3IypQ,5.0,1,0,1,I've taken a lot of spin classes over the year...,2012-01-03 15:28:18
2,saUsX_uimxRlCVr67Z4Jig,8g_iMtfSiwikVnbP2etR0A,YjUWPpI6HXG530lwP-fb2A,3.0,0,0,0,Family diner. Had the buffet. Eclectic assortm...,2014-02-05 20:30:30
3,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0,1,0,1,"Wow! Yummy, different, delicious. Our favo...",2015-01-04 00:01:03
4,Sx8TMOWLNuJBWer-0pcmoA,bcjbaE6dDog4jkNY91ncLQ,e4Vwtrqf-wpJfwesgvdgxQ,4.0,1,0,1,Cute interior and owner (?) gave us tour of up...,2017-01-14 20:54:15


In [11]:
df_business = load_rows(business, 5)
df_business

C:\Users\isabe\Downloads\yelp-dataset\yelp_academic_dataset_business.json loaded. Count = 5 , Time = 0.0 secs.


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."


### Create SQLite database

In [12]:
# Functions to work with db2api from sqlite3

PATHSTART = "."
def get_db(dbfile):
    #get connection to db
    sqlite_db = sq3.connect(Path(PATHSTART)/ dbfile)
    return sqlite_db

def init_db(dbfile, schema):
    #create db a/c to schema
    db = get_db(dbfile)
    
    #execute sql code
    c = db.cursor()
    c.executescript(schema)
    
    #make commit
    db.commit()
    return db

def make_query(sel):
    c = db.cursor().execute(sel)
    return c.fetchall()

from collections import OrderedDict
def make_frame(list_of_tuples, legend):
    framelist=[]
    for i, cname in enumerate(legend):
        framelist.append((cname,[e[i] for e in list_of_tuples]))
    return pd.DataFrame.from_dict(OrderedDict(framelist)) 

In [13]:
#Table Schema for tables in our SQLite database

users_schema = """
DROP TABLE IF EXISTS "users";

CREATE TABLE "users" (
    "user_id" VARCHAR PRIMARY KEY NOT NULL,
    "reviewer_name" VARCHAR,
    "reviewer_review_count" INTEGER,
    "reviewer_yelping_since" TIMESTAMP,
    "reviewer_useful" INTEGER,
    "reviewer_funny" INTEGER,
    "reviewer_cool" INTEGER,
    "reviewer_elite" VARCHAR,
    "reviewer_friends" VARCHAR,
    "reviewer_fans" INTEGER,
    "reviewer_average_stars" FLOAT,
    "reviewer_compliment_hot" INTEGER,
    "reviewer_compliment_more" INTEGER, 
    "reviewer_compliment_profile" INTEGER,
    "reviewer_compliment_cute" INTEGER,
    "reviewer_compliment_list" INTEGER,
    "reviewer_compliment_note" INTEGER,
    "reviewer_compliment_plain" INTEGER,
    "reviewer_compliment_cool" INTEGER,
    "reviewer_compliment_funny" INTEGER,
    "reviewer_compliment_writer" INTEGER,
    "reviewer_compliment_photos" INTEGER
);
"""
businesses_schema="""
DROP TABLE IF EXISTS "business";

CREATE TABLE "business" (
    "business_id" VARCHAR PRIMARY KEY NOT NULL,
    "business_name" VARCHAR,
    "business_full_address" VARCHAR,
    "business_city" VARCHAR,
    "business_state" VARCHAR,
    "business_postal_code" VARCHAR,
    "business_latitude" FLOAT,
    "business_longitude" FLOAT,
    "business_stars" FLOAT,
    "business_review_count" INTEGER,
    "business_is_open" BOOLEAN,
    "business_categories" VARCHAR
);
"""
reviews_schema = """
DROP TABLE IF EXISTS "reviews";

CREATE TABLE "reviews" (
    "review_id" VARCHAR PRIMARY KEY,
    "user_id" VARCHAR,
    "business_id" VARCHAR,
    "stars" FLOAT,
    "useful" INTEGER,
    "funny" INTEGER,
    "cool" INTEGER,
    "text"  VARCHAR,
    "date" TIMESTAMP,
    
    
    FOREIGN KEY (user_id) REFERENCES users(user_id),
    FOREIGN KEY (business_id) REFERENCES businesses(business_id)
);
"""
schema_close = ");"

In [14]:
def read_json(file):
    data_file = open(file, encoding="utf-8")
    data = []

    for line in data_file:
        data.append(json.loads(line))

    df = pd.DataFrame(data)
    data_file.close()
    return df

In [15]:
#reading data from json to dataframe
df_business = read_json(business)
df_business.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."


In [16]:
%%time

#Here we preprocess our businesses data
def preprocess_business_df(df):
    """
    Preprocess data from business
    returns final DataFrame
    """
    #mask to only select businesses which are either Restaurant or serve Food
    mask = df.categories.str.contains('Restaurant', na=False) | df.categories.str.contains('Food', na=False)
    df = df[mask]
    df.drop(['hours'], axis=1, inplace=True)
    df.drop(['attributes'], axis=1, inplace=True)
    #TDT
    df.is_open = df.is_open.astype(bool)
    

    # Create a dictionary to specify the column name changes
    column_mapping = {
        'name': 'business_name',
        'address': 'business_full_address',
        'city': 'business_city',
        'state': 'business_state',
        'postal_code': 'business_postal_code',
        'latitude': 'business_latitude',
        'longitude': 'business_longitude',
        'stars': 'business_stars',
        'review_count': 'business_review_count',
        'is_open': 'business_is_open',
        'categories': 'business_categories'
    }

    # Rename the columns using the dictionary
    df = df.rename(columns=column_mapping)


    return df

df_business = preprocess_business_df(df_business)
df_business.head()

CPU times: total: 109 ms
Wall time: 678 ms


Unnamed: 0,business_id,business_name,business_full_address,business_city,business_state,business_postal_code,business_latitude,business_longitude,business_stars,business_review_count,business_is_open,business_categories
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,True,"Restaurants, Food, Bubble Tea, Coffee & Tea, B..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,True,"Brewpubs, Breweries, Food"
5,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,615 S Main St,Ashland City,TN,37015,36.269593,-87.058943,2.0,6,True,"Burgers, Fast Food, Sandwiches, Food, Ice Crea..."
8,k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,8025 Mackenzie Rd,Affton,MO,63123,38.565165,-90.321087,3.0,19,False,"Pubs, Restaurants, Italian, Bars, American (Tr..."
9,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,2312 Dickerson Pike,Nashville,TN,37207,36.208102,-86.76817,1.5,10,True,"Ice Cream & Frozen Yogurt, Fast Food, Burgers,..."


In [17]:
%%time
#Create db
db = init_db("yelp_database.db", users_schema+businesses_schema+reviews_schema)

#business data to sql
df_business.to_sql('business', db, if_exists='append', index=False)

#release memory
del df_business

CPU times: total: 7.52 s
Wall time: 2min 51s


In [18]:
#reading data from json to dataframe
df_users = read_json(users)
df_users.head()

Unnamed: 0,user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,...,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos
0,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007-01-25 16:47:26,7217,1259,5994,2007,"NSCy54eWehBJyZdG2iE84w, pe42u7DcCH2QmI81NX-8qA...",267,...,65,55,56,18,232,844,467,467,239,180
1,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,2009-01-25 04:35:42,43091,13066,27281,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...","ueRPE0CX75ePGMqOFVj6IQ, 52oH4DrRvzzl8wh5UXyU0A...",3138,...,264,184,157,251,1847,7054,3131,3131,1521,1946
2,2WnXYQFK0hXEoTxPtV2zvg,Steph,665,2008-07-25 10:41:00,2086,1010,1003,20092010201120122013,"LuO3Bn4f3rlhyHIaNfTlnA, j9B4XdHUhDfTKVecyWQgyA...",52,...,13,10,17,3,66,96,119,119,35,18
3,SZDeASXq7o05mMNLshsdIA,Gwen,224,2005-11-29 04:38:33,512,330,299,200920102011,"enx1vVPnfdNUdPho6PH_wg, 4wOcvMLtU6a9Lslggq74Vg...",28,...,4,1,6,2,12,16,26,26,10,9
4,hA5lMy-EnncsH4JoR-hFGQ,Karen,79,2007-01-05 19:40:59,29,15,7,,"PBK4q9KEEBHhFvSXCUirIw, 3FWPpM7KU1gXeOM_ZbYMbA...",1,...,1,0,0,0,1,1,0,0,0,0


In [19]:
%%time
#Preproccess user data
def preprocess_user_df(df):
    #TDT to datetime
    df.yelping_since = pd.to_datetime(df.yelping_since)
    df.elite = df.elite.replace('', np.nan)
        # Create a dictionary to specify the column name changes
    column_mapping = {
        'name': 'reviewer_name',
        'review_count': 'reviewer_review_count',
        'yelping_since': 'reviewer_yelping_since',
        'useful': 'reviewer_useful',
        'funny': 'reviewer_funny',
        'cool': 'reviewer_cool',
        'elite': 'reviewer_elite',
        'fans': 'reviewer_fans',
        'friends': 'reviewer_friends',
        'average_stars': 'reviewer_average_stars',
        'compliment_hot': 'reviewer_compliment_hot',
        'compliment_more': 'reviewer_compliment_more',
        'compliment_profile': 'reviewer_compliment_profile',
        'compliment_cute': 'reviewer_compliment_cute',
        'compliment_list': 'reviewer_compliment_list',
        'compliment_note': 'reviewer_compliment_note',
        'compliment_plain': 'reviewer_compliment_plain',
        'compliment_cool': 'reviewer_compliment_cool',
        'compliment_funny': 'reviewer_compliment_funny',
        'compliment_writer': 'reviewer_compliment_writer',
        'compliment_photos': 'reviewer_compliment_photos'
    }
    
    # Rename the columns using the dictionary
    df = df.rename(columns=column_mapping)
    return df

df_users = preprocess_user_df(df_users)

CPU times: total: 1.77 s
Wall time: 1.87 s


In [20]:
df_users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1987897 entries, 0 to 1987896
Data columns (total 22 columns):
 #   Column                       Dtype         
---  ------                       -----         
 0   user_id                      object        
 1   reviewer_name                object        
 2   reviewer_review_count        int64         
 3   reviewer_yelping_since       datetime64[ns]
 4   reviewer_useful              int64         
 5   reviewer_funny               int64         
 6   reviewer_cool                int64         
 7   reviewer_elite               object        
 8   reviewer_friends             object        
 9   reviewer_fans                int64         
 10  reviewer_average_stars       float64       
 11  reviewer_compliment_hot      int64         
 12  reviewer_compliment_more     int64         
 13  reviewer_compliment_profile  int64         
 14  reviewer_compliment_cute     int64         
 15  reviewer_compliment_list     int64         
 16  

In [21]:
%%time
#Sending user data to SQL db
df_users.to_sql('users', db, if_exists='append', index=False)
#Release memory
del df_users

CPU times: total: 30.2 s
Wall time: 57 s


In [22]:
%%time
#Here we load data from reviews

#size of review.json overpowers RAM hence we'll process data in chunks and store it to table
for data in load_rows_gen(reviews):
    #transformations
    data.date = pd.to_datetime(data.date)
    data.dropna(inplace=True)
    #sending chunk to sql
    data.to_sql('reviews', db, if_exists='append', index=False)
del data

Loaded chunk of size: 1000000 , Time = 4.74 secs.
Loaded chunk of size: 1000000 , Time = 4.76 secs.
Loaded chunk of size: 1000000 , Time = 4.76 secs.
Loaded chunk of size: 1000000 , Time = 4.82 secs.
Loaded chunk of size: 1000000 , Time = 4.89 secs.
Loaded chunk of size: 1000000 , Time = 4.81 secs.
Loaded chunk of size: 990280 , Time = 4.89 secs.
CPU times: total: 2min 59s
Wall time: 4min


In [23]:
# Connect to database
db = sq3.connect('yelp_database.db')

In [24]:
cursor = db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

table_names = [table[0] for table in tables]
print(table_names)


['users', 'business', 'reviews']


In [25]:
# Define your SQL join query
join_query = '''
SELECT business.business_id,
business.business_categories,
business.business_city,
business.business_full_address,
business.business_latitude,
business.business_longitude,
business.business_name,
business.business_is_open,
business.business_review_count,
business.business_stars,
business.business_state,
cool,
date,
funny,
review_id,
users.reviewer_average_stars,
users.reviewer_cool,
users.reviewer_funny,
users.reviewer_name,
users.reviewer_review_count,
users.reviewer_useful,
stars,
text,
useful,
reviews.user_id 

FROM reviews 
JOIN users ON reviews.user_id = users.user_id 
JOIN business ON reviews.business_id = business.business_id 
;
'''

In [26]:
df = pd.read_sql_query(join_query, db)
df.head(5)
#db.close()

Unnamed: 0,business_id,business_categories,business_city,business_full_address,business_latitude,business_longitude,business_name,business_is_open,business_review_count,business_stars,...,reviewer_average_stars,reviewer_cool,reviewer_funny,reviewer_name,reviewer_review_count,reviewer_useful,stars,text,useful,user_id
0,XQfwVwDr-v0ZS3_CbbE5Xw,"Restaurants, Breakfast & Brunch, Food, Juice B...",North Wales,1460 Bethlehem Pike,40.210196,-75.223639,Turning Point of North Wales,1,169,3.0,...,4.06,8,3,Melanie,33,32,3.0,"If you decide to eat here, just be aware it is...",0,mh_-eMZ6K5RLWhZyISBhwA
1,YjUWPpI6HXG530lwP-fb2A,"Restaurants, Breakfast & Brunch",Tucson,748 W Starr Pass Blvd,32.207233,-110.980864,Kettle Restaurant,1,47,3.5,...,4.69,1300,675,Debra,1332,1660,3.0,Family diner. Had the buffet. Eclectic assortm...,0,8g_iMtfSiwikVnbP2etR0A
2,kxX2SOes4o-D3ZQBkiMRfA,"Halal, Pakistani, Restaurants, Indian",Philadelphia,2481 Grant Ave,40.079848,-75.02508,Zaika,1,181,4.0,...,4.78,1,0,Kyle,9,1,5.0,"Wow! Yummy, different, delicious. Our favo...",1,_7bHUi9Uuf5__HHc_Q8guQ
3,e4Vwtrqf-wpJfwesgvdgxQ,"Sandwiches, Beer, Wine & Spirits, Bars, Food, ...",New Orleans,2549 Banks St,29.962102,-90.087958,Melt,0,32,4.0,...,2.97,12,20,Sophia,126,74,4.0,Cute interior and owner (?) gave us tour of up...,1,bcjbaE6dDog4jkNY91ncLQ
4,04UD14gamNjLY0IDYVhHJg,"Mediterranean, Restaurants, Seafood, Greek",Philadelphia,795 S 3rd St,39.938013,-75.148131,Dmitri's,0,273,4.0,...,2.0,1,7,Q,4,5,1.0,I am a long term frequent customer of this est...,1,eUta8W_HdHMXPzLBBZhL1A


In [27]:
# Assuming you have a DataFrame called 'df'

# Specify the file path and name for the CSV file
csv_file_path = download_path+'\yelp.csv'

# Save the DataFrame to a CSV file
df.to_csv(csv_file_path, index=False)


In [28]:
db.close()