#### review.json   : Contains full review text data including the user_id that wrote the review and the business_id the review is written for.

## Subsetting:

In [1]:
import json
import csv
import pandas as pd
import numpy as np
import gc
import re

#plotting
import matplotlib.pyplot as plt 
import seaborn as sns

def ReadJSONitems(file_name, within_list=False):
    items = []
    with open(file_name, encoding='utf8') as read_file:
        if not within_list:
            for row in read_file:
                items.append(json.loads(row))
        else:
            items = json.load(read_file)
    return items

def WriteJSON(file_name, data):
    with open(file_name, "w") as out_file:
        json.dump(data, out_file)

def PropFilter(items, prop, approved_values):
    new_list = []
    for item in items:
        if item[prop] in approved_values:
            new_list.append(item)
    return new_list

def GetPropList(items, prop):
    return [item[prop] for item in items]

def binary_search(x, lyst, l, r):
    if l > r:
        return False
    while l <= r:
        mid = (l+r) >> 1
        if lyst[mid] == x:
            return True
        elif lyst[mid] < x:
            l = mid + 1
        else:
            r = mid - 1
    return False

In [2]:
reviews = ReadJSONitems('yelp_academic_dataset_review.json')
reviews

[{'review_id': 'KU_O5udG6zpxOg-VcAEodg',
  'user_id': 'mh_-eMZ6K5RLWhZyISBhwA',
  'business_id': 'XQfwVwDr-v0ZS3_CbbE5Xw',
  'stars': 3.0,
  'useful': 0,
  'funny': 0,
  'cool': 0,
  'text': "If you decide to eat here, just be aware it is going to take about 2 hours from beginning to end. We have tried it multiple times, because I want to like it! I have been to it's other locations in NJ and never had a bad experience. \n\nThe food is good, but it takes a very long time to come out. The waitstaff is very young, but usually pleasant. We have just had too many experiences where we spent way too long waiting. We usually opt for another diner or restaurant on the weekends, in order to be done quicker.",
  'date': '2018-07-07 22:09:11'},
 {'review_id': 'BiTunyQ73aT9WBnpR9DZGw',
  'user_id': 'OyoGAe7OKpv6SyGZT5g77Q',
  'business_id': '7ATYjTIgM3jUlt4UM3IypQ',
  'stars': 5.0,
  'useful': 1,
  'funny': 0,
  'cool': 1,
  'text': "I've taken a lot of spin classes over the years, and nothing com

In [3]:
restaurants_reduced_list = ReadJSONitems('processed_businesses.json')[0]
restaurants_reduced_list

[{'business_id': 'MUTTqe8uqyMdBl186RmNeA',
  'business_name': 'Tuna Bar',
  'city': 'Philadelphia',
  'state': 'PA',
  'postal_code': '19106',
  'stars': '4.0',
  'categories': 'Sushi Bars, Restaurants, Japanese',
  'review_count': '245',
  'total_hh': '7625',
  'avg_income': '162885',
  'RestaurantsDelivery': 'True',
  'OutdoorSeating': 'True',
  'BusinessAcceptsCreditCards': 'True',
  'BikeParking': 'False',
  'RestaurantsTakeOut': 'True',
  'Caters': 'False',
  'RestaurantsReservations': 'True',
  'RestaurantsGoodForGroups': 'True',
  'HasTV': 'False',
  'GoodForKids': 'False',
  'BusinessParking_garage': 'False',
  'BusinessParking_street': 'True',
  'BusinessParking_validated': 'False',
  'BusinessParking_lot': 'False',
  'BusinessParking_valet': 'False',
  'Ambience_touristy': 'False',
  'Ambience_hipster': 'False',
  'Ambience_romantic': 'False',
  'Ambience_divey': 'False',
  'Ambience_intimate': 'False',
  'Ambience_trendy': 'False',
  'Ambience_upscale': 'False',
  'Ambience_

In [4]:
# Get business_id of restaurants in the chosen list
reduced_restaurants_ids = sorted(GetPropList(restaurants_reduced_list, 'business_id')) #sort ids
reduced_restaurants_ids

['-0TffRSXXIlBYVbb5AwfTg',
 '-1B9pP_CrRBJYPICE5WbRA',
 '-2YZIUArmrIMVVBi8CT47w',
 '-361Hc0tlxSYdrH_C3OgzA',
 '-3IOd5YntpkbK6RwT3HYtA',
 '-5Rah4ZvWsDu4oilUZxhtw',
 '-5psHqEISccHDdHaWF2-6Q',
 '-63ytt5vkWof-M9NDGTkng',
 '-6JdVK-DHB4_43PEksbg1A',
 '-8VpP01AKfnt4wpT-8vgnQ',
 '-9lL4yFBX_7XVqUr8cwytg',
 '-ATiAtTikuGuqvaW2O6tNA',
 '-AWclhh1_2VnqPylPgBU3g',
 '-AanHawaDlzWHQjrqRRWig',
 '-AgYybTLhispcYcoIV7E9A',
 '-B6XL-ZWsVHlAQyYcd3eEg',
 '-Bhoyo7LL97tgt9Hze0Saw',
 '-CRjqZ4GxZ6lIa_qMj7-yw',
 '-CvjeIt8CMUMBRnikhHCDg',
 '-DGsnMlRrR_tv8avrpQLQw',
 '-DcAq3aOYlMTM0vzOIAMMw',
 '-EHrU_DwtX2Jv4NbAMppTA',
 '-ExvxN1QHx4efMLm6PxJ4A',
 '-FIYPnxsg3kuHXx46zY18w',
 '-FasKNAqishyfjAarVgJtw',
 '-HPx2qKK5EDSltrtd1s2rg',
 '-HSWvV_IvXtMFzApwjF6YQ',
 '-HcABf2Y81fglisWWXPYdQ',
 '-IOWVdmqsfd02_G-nJACdQ',
 '-IvBAqkaQcDt-fj-KhOFpQ',
 '-J0tPzkwPnyulSBE_s863Q',
 '-JQ6tBgz-YUD5HjJXIRKYg',
 '-JStjL-8mRZq4ov4uI1FaQ',
 '-JpnQBGy8daXKvMvtjto2w',
 '-KQsS-zouGNa6qczrzO0Ew',
 '-KWel73upKHxstRCr9JGLw',
 '-Khy_h5lLuhzCts3c318_g',
 

In [5]:
len(reduced_restaurants_ids)

6265

In [6]:
# get all reviews for the restaurants in the chosen list:
reduced_reviews_list = []
for review in reviews:
    if binary_search(review['business_id'], reduced_restaurants_ids, 0, len(reduced_restaurants_ids)-1):
        reduced_reviews_list.append(review)

In [7]:
print(f'Nuber of reduced reviews: {len(reduced_reviews_list):,}'.format())

Nuber of reduced reviews: 1,194,951


In [8]:
# Write to new file:
WriteJSON("review_restaurants_reduced.json", reduced_reviews_list)

In [9]:
# # Write to a new file without text:
# reviews_copy = reviews.copy()
# for review in reviews_copy:
#     review.pop('text')

# WriteJSON("review_restaurants_PA_FL_RemovedText.json", reviews_copy)

In [10]:
# ReadJSONitems("review_restaurants_PA_FL_RemovedText.json")

## Reading the reduced review dataset and cleaning:

In [11]:
reduced_review_items = ReadJSONitems("review_restaurants_reduced.json")
reduced_review_items

[[{'review_id': 'KU_O5udG6zpxOg-VcAEodg',
   'user_id': 'mh_-eMZ6K5RLWhZyISBhwA',
   'business_id': 'XQfwVwDr-v0ZS3_CbbE5Xw',
   'stars': 3.0,
   'useful': 0,
   'funny': 0,
   'cool': 0,
   'text': "If you decide to eat here, just be aware it is going to take about 2 hours from beginning to end. We have tried it multiple times, because I want to like it! I have been to it's other locations in NJ and never had a bad experience. \n\nThe food is good, but it takes a very long time to come out. The waitstaff is very young, but usually pleasant. We have just had too many experiences where we spent way too long waiting. We usually opt for another diner or restaurant on the weekends, in order to be done quicker.",
   'date': '2018-07-07 22:09:11'},
  {'review_id': 'AqPFMleE6RsU23_auESxiA',
   'user_id': '_7bHUi9Uuf5__HHc_Q8guQ',
   'business_id': 'kxX2SOes4o-D3ZQBkiMRfA',
   'stars': 5.0,
   'useful': 1,
   'funny': 0,
   'cool': 1,
   'text': "Wow!  Yummy, different,  delicious.   Our favor

In [12]:
pd.set_option('display.max_colwidth', None)

reduced_review_df = pd.DataFrame(reduced_review_items[0])

In [13]:
reduced_review_df.shape

(1194951, 9)

In [14]:
reduced_review_df.head()

Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is going to take about 2 hours from beginning to end. We have tried it multiple times, because I want to like it! I have been to it's other locations in NJ and never had a bad experience. \n\nThe food is good, but it takes a very long time to come out. The waitstaff is very young, but usually pleasant. We have just had too many experiences where we spent way too long waiting. We usually opt for another diner or restaurant on the weekends, in order to be done quicker.",2018-07-07 22:09:11
1,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0,1,0,1,"Wow! Yummy, different, delicious. Our favorite is the lamb curry and korma. With 10 different kinds of naan!!! Don't let the outside deter you (because we almost changed our minds)...go in and try something new! You'll be glad you did!",2015-01-04 00:01:03
2,OAhBYw8IQ6wlfw1owXWRWw,1C2lxzUo1Hyye4RFIXly3g,BVndHaLihEYbr76Z0CMEGw,5.0,0,0,0,"Great place for breakfast! I had the waffle, which was fluffy and perfect, and home fries which were nice and smashed and crunchy. Friendly waitstaff. Will definitely be back!",2014-10-11 16:22:06
3,oyaMhzBSwfGgemSGuZCdwQ,Dd1jQj7S-BFGqRbApFzCFw,YtSqYv1Q_pOltsVPSx54SA,5.0,0,0,0,"Tremendous service (Big shout out to Douglas) that complemented the delicious food. Pretty expensive establishment (40-50$ avg for your main course), but its definitely backs that up with an atmosphere that's comparable with any of the top tier restaurants across the country.",2013-06-24 11:21:25
4,G_5UczbCBJriUAbxz3J7Tw,clWLI5OZP2ad25ugMVI8gg,x4XdNhp0Xn8lOivzc77J-g,5.0,0,0,0,Best thai food in the area. Everything was authentic and delicious. Will definitely be back again and again.,2013-08-15 15:27:51


In [15]:
reduced_review_df["date"] = pd.to_datetime(reduced_review_df["date"], format='%Y-%m-%d')
reduced_review_df['year'] = reduced_review_df.date.dt.year

In [16]:
reduced_review_df

Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date,year
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is going to take about 2 hours from beginning to end. We have tried it multiple times, because I want to like it! I have been to it's other locations in NJ and never had a bad experience. \n\nThe food is good, but it takes a very long time to come out. The waitstaff is very young, but usually pleasant. We have just had too many experiences where we spent way too long waiting. We usually opt for another diner or restaurant on the weekends, in order to be done quicker.",2018-07-07 22:09:11,2018
1,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0,1,0,1,"Wow! Yummy, different, delicious. Our favorite is the lamb curry and korma. With 10 different kinds of naan!!! Don't let the outside deter you (because we almost changed our minds)...go in and try something new! You'll be glad you did!",2015-01-04 00:01:03,2015
2,OAhBYw8IQ6wlfw1owXWRWw,1C2lxzUo1Hyye4RFIXly3g,BVndHaLihEYbr76Z0CMEGw,5.0,0,0,0,"Great place for breakfast! I had the waffle, which was fluffy and perfect, and home fries which were nice and smashed and crunchy. Friendly waitstaff. Will definitely be back!",2014-10-11 16:22:06,2014
3,oyaMhzBSwfGgemSGuZCdwQ,Dd1jQj7S-BFGqRbApFzCFw,YtSqYv1Q_pOltsVPSx54SA,5.0,0,0,0,"Tremendous service (Big shout out to Douglas) that complemented the delicious food. Pretty expensive establishment (40-50$ avg for your main course), but its definitely backs that up with an atmosphere that's comparable with any of the top tier restaurants across the country.",2013-06-24 11:21:25,2013
4,G_5UczbCBJriUAbxz3J7Tw,clWLI5OZP2ad25ugMVI8gg,x4XdNhp0Xn8lOivzc77J-g,5.0,0,0,0,Best thai food in the area. Everything was authentic and delicious. Will definitely be back again and again.,2013-08-15 15:27:51,2013
...,...,...,...,...,...,...,...,...,...,...
1194946,me7QTotYCOjWNVA8bzN1eg,bJ5FtCtZX3ZZacz2_2PJjA,wMQkdK2aNMvq2xoojC98Mw,4.0,3,1,3,"South Street Diner isn't the best of Philly Diners. but It's always the house of some good memories. Back when I used to be big on Industrial music my friends and I would come here for a 2 AM breakfast fix after a night at the club. all the locals would give us confused looks that were monumental. \nI've had family live on South Street for many years now. so going here has been a must 24/7. not for the food, not for the service (which is actually pretty damn good on most nights). but simply for the tradition of catching up with old friends and reliving some good moments over coffee and a warm plate of eggs. and when it comes to doing so this place is the best.",2007-07-27 20:12:11,2007
1194947,5n_oSwXspiiSsZgNwjp48g,bJ5FtCtZX3ZZacz2_2PJjA,SOsjW1JARmtHUFtpFlp8rw,4.0,5,2,5,"When I first heard that the Peace A Pizza (https://www.yelp.com/biz/peace-a-pizza-ardmore-2) were embracing the fast casual movement and making custom pies possible faster than ever before, I loved the idea. Especially when I found out that they were opening a Philly location in the spot that used to house my beloved burger joint 500 Degrees (https://www.yelp.com/biz/500-degrees-philadelphia-3). And after a couple of visits I can say that I'm happy with the end result overall.\n\nUltimately I give 4 stars because there's another nearby fast casual pizza joint that delivers pies slightly better (&Pizza review coming later). I also find the huge wooden serving blocks that they give you when you order in to be kind of awkward to handle, especially when you come in during a lunch or dinner rush. But when it comes down to it I'm impressed with the number of specialty pies, grain bowls, and salads that they're able to dish out, and liked the selection of New Hope fountain sodas that you were able to grab with free refills. \n\nFor my first visit I ordered the appropriately named Kennett Square, a delicious pie that didn't disappoint with its medley of local roasted mushrooms, crumbled goat cheese, herb butter, and shredded mozzarella. For my second visit I was craving meat, and got it with a trio of pepperoni, sweet Italian sausage, and veal meatballs on their Sweet n' Slow pie! Add in plenty of customization options, and a free glass of wine during their Wine Wednesdays, and you have a solid spot!",2017-02-23 19:11:04,2017
1194948,A5uNiAOGcU0M34sSGN6Wkg,JWhZs-vSggwN6WgxBHgIDw,B2xtWMBTyDtd-ndqqaIC1Q,1.0,0,0,0,"Sat at the bar for 10 minutes and didn't get served at all. There were only 4 other people at the bar. The bartender was no where to be found. For having 90 taps, they lack a good selection of IPA's.",2022-01-15 15:41:15,2022
1194949,MVg4YUQeEhCA7Z7RsBJSVg,7-7A0Avj47slLGV7yBFc8w,ytynqOUb3hjKeJfRj5Tshw,3.0,1,0,0,"I was so excited about all the food I saw, but unfortunately this place CLOSES SO EARLY!!!!!!!!!!!!!!!! After being a tourist and sightseeing, by the time I got there at 5 or 6, things started closing. the hours of operation is definitely the biggest CON EVER!!!",2013-07-25 21:00:15,2013


### Text Preprocessing:

In [17]:
import string
import nltk
import re
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk.probability import FreqDist
from wordcloud import WordCloud, STOPWORDS
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/anhnguyen/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [18]:
def clean_text(text):

    text = text.lower() # convert to lower case

    text = text.replace('\n', ' ') 
    text = text.replace('wasn\'t', 'was not') 
    text = text.replace('weren\'t', 'were not') 
    text = text.replace('won\'t', 'will not') 
    text = text.replace('wouldn\'t', 'would not')  
    text = text.replace('can\'t', 'cannot')  
    text = text.replace('couldn\'t', 'could not')  
    text = text.replace('don\'t', 'do not')  
    text = text.replace('didn\'t', 'did not')   
    text = text.replace('doesn\'t', 'does not') 
    text = text.replace('isn\'t', 'is not')
    text = text.replace('hasn\'t', 'has not')
    text = text.replace('haven\'t', 'have not')
    text = text.replace('hadn\'t', 'had not')  
    text = text.replace('shouldn\'t', 'should not') 
    text = text.replace('aren\'t', 'are not') 
    text = text.replace('ain\'t', 'am not') 
    text = text.replace('\'s', '')
    text = text.replace('i\'m', 'i am')
    text = text.replace('i\'ve', 'i have')
    text = text.replace('i\'ll', 'i will')
    text = text.replace('we\'ve', 'we have')
    text = text.replace('i\'d', 'i would')
    text = text.replace('you\'re', 'you are')

    text = re.sub(r'http\S+', '', text) ## remove url
    text = re.sub('\w*\d\w*', '', str(text))  ## remove numbers 
    text = re.sub('[^\w\s]', '', text)  ## remove punctuation

    tokens = word_tokenize(text) ## tokenize words

    stop_words = stopwords.words('english')
    stop_words = set(stop_words)
    words = [w for w in tokens if not w in stop_words] ## remove stop words

    lemmatizer = WordNetLemmatizer()
    text = [lemmatizer.lemmatize(w) for w in words] ## lemmatize words

    text = " ".join(w for w in text)
    
    text = text.replace('wa ', ' ') 
    text = text.replace('ha ', ' ') 

    return text

In [19]:
reduced_review_df['new_text'] = reduced_review_df.text.apply(lambda x: clean_text(x))
reduced_review_df.head()

Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date,year,new_text
0,KU_O5udG6zpxOg-VcAEodg,mh_-eMZ6K5RLWhZyISBhwA,XQfwVwDr-v0ZS3_CbbE5Xw,3.0,0,0,0,"If you decide to eat here, just be aware it is going to take about 2 hours from beginning to end. We have tried it multiple times, because I want to like it! I have been to it's other locations in NJ and never had a bad experience. \n\nThe food is good, but it takes a very long time to come out. The waitstaff is very young, but usually pleasant. We have just had too many experiences where we spent way too long waiting. We usually opt for another diner or restaurant on the weekends, in order to be done quicker.",2018-07-07 22:09:11,2018,decide eat aware going take hour beginning end tried multiple time want like location nj never bad experience food good take long time come waitstaff young usually pleasant many experience spent way long waiting usually opt another diner restaurant weekend order done quicker
1,AqPFMleE6RsU23_auESxiA,_7bHUi9Uuf5__HHc_Q8guQ,kxX2SOes4o-D3ZQBkiMRfA,5.0,1,0,1,"Wow! Yummy, different, delicious. Our favorite is the lamb curry and korma. With 10 different kinds of naan!!! Don't let the outside deter you (because we almost changed our minds)...go in and try something new! You'll be glad you did!",2015-01-04 00:01:03,2015,wow yummy different delicious favorite lamb curry korma different kind naan let outside deter almost changed mindsgo try something new youll glad
2,OAhBYw8IQ6wlfw1owXWRWw,1C2lxzUo1Hyye4RFIXly3g,BVndHaLihEYbr76Z0CMEGw,5.0,0,0,0,"Great place for breakfast! I had the waffle, which was fluffy and perfect, and home fries which were nice and smashed and crunchy. Friendly waitstaff. Will definitely be back!",2014-10-11 16:22:06,2014,great place breakfast waffle fluffy perfect home fry nice smashed crunchy friendly waitstaff definitely back
3,oyaMhzBSwfGgemSGuZCdwQ,Dd1jQj7S-BFGqRbApFzCFw,YtSqYv1Q_pOltsVPSx54SA,5.0,0,0,0,"Tremendous service (Big shout out to Douglas) that complemented the delicious food. Pretty expensive establishment (40-50$ avg for your main course), but its definitely backs that up with an atmosphere that's comparable with any of the top tier restaurants across the country.",2013-06-24 11:21:25,2013,tremendous service big shout douglas complemented delicious food pretty expensive establishment avg main course definitely back atmosphere comparable top tier restaurant across country
4,G_5UczbCBJriUAbxz3J7Tw,clWLI5OZP2ad25ugMVI8gg,x4XdNhp0Xn8lOivzc77J-g,5.0,0,0,0,Best thai food in the area. Everything was authentic and delicious. Will definitely be back again and again.,2013-08-15 15:27:51,2013,best thai food area everything authentic delicious definitely back


In [20]:
# save DF as csv
reduced_review_df.to_csv('reviews_processed.csv',index=False) # to be used if kernel crashes (beware there will be nulls!)

### TF-IDF Vectorizer

In [1]:
import pandas as pd
processed_review_df = pd.read_csv('reviews_processed.csv')

In [2]:
processed_review_df.info() # check for nulls

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1194951 entries, 0 to 1194950
Data columns (total 11 columns):
 #   Column       Non-Null Count    Dtype  
---  ------       --------------    -----  
 0   review_id    1194951 non-null  object 
 1   user_id      1194951 non-null  object 
 2   business_id  1194951 non-null  object 
 3   stars        1194951 non-null  float64
 4   useful       1194951 non-null  int64  
 5   funny        1194951 non-null  int64  
 6   cool         1194951 non-null  int64  
 7   text         1194951 non-null  object 
 8   date         1194951 non-null  object 
 9   year         1194951 non-null  int64  
 10  new_text     1194942 non-null  object 
dtypes: float64(1), int64(4), object(6)
memory usage: 100.3+ MB


In [3]:
processed_review_df.dropna(axis=0, inplace=True) #drop rows with nulls

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer_tf = TfidfVectorizer(max_features=5000)
vectors = vectorizer_tf.fit_transform(processed_review_df['new_text'])

In [5]:
processed_review_df = processed_review_df[['business_id']]

In [6]:
vectors.shape

(1194942, 5000)

### [DO NOT RUN from this part otherwise kernel will crash!]

In [19]:
word_vectors = vectorizer_tf.get_feature_names_out()

In [7]:
matrix = vectors.todense()

In [21]:
del vectors

In [None]:
list_dense = matrix.tolist()

: 

: 

In [None]:
vectors_df = pd.DataFrame(list_dense,columns=word_vectors)

## Merge with business dataset:
We wanted to merge all the words' vectorization to the business dataset but failed to convert the matrix of vectorization to a DataFrame from the step above. Therefore, we no longer use word vectors in this project.

In [None]:
# businesses_df = pd.read_csv("final_businesses.csv", sep=',', header=0)
# businesses_df.columns

In [None]:
# businesses_df.shape

In [None]:
# businesses_df.rename({"stars": "business_stars"}, axis = "columns", inplace = True) 
# businesses_df.columns

In [None]:
# reduced_review_df.columns

In [None]:
# reduced_review_df.rename({"stars": "review_stars"}, axis = "columns", inplace = True) 

In [None]:
# business_with_reviews = pd.merge(businesses_df, reduced_review_df, how='inner', on='business_id')
# business_with_reviews

In [None]:
# business_with_reviews.to_csv("reviews_merged.csv", index=False)