# Introduction
This project will focus on data scrapping from tripadvisor reviews on Disneyland locations. Reviews for each locations will be converted into separate csv files. 

In [1]:
# import libraries to be used
import requests
from bs4 import BeautifulSoup
import re
from csv import writer
from geopy.geocoders import Nominatim
import pandas as pd
geolocater = Nominatim(user_agent = "geoapiExercises")

## 1. Tokyo Disneyland

In [9]:
# grab url from the website (tokyo disneyland reviews on tripadvisor)
url = "https://www.tripadvisor.com/Attraction_Review-g14134868-d320634-Reviews-Tokyo_Disneyland-Maihama_Urayasu_Chiba_Prefecture_Kanto.html"
# add headers found from stackoverflow to prevent code from stopping (https://stackoverflow.com/questions/71937012/python-web-scraper-not-working-for-tripadvisor)
headers = {'User-Agent': 'Mozilla/5.0'}
page = requests.get(url, headers=headers)

In [10]:
# create soup variable
soup = BeautifulSoup(page.content, "html.parser")
# create result variable that grabs each review cards
results = soup.find_all("div", class_="_c")

In [12]:
# using regex module to remove numbers from particular results
pattern = r"[0-9]"
review_num = 0
# create csv file
with open("Disneyland_Tokyo_Reviews_proto.csv", "w", newline = "", encoding = "utf8") as f:
    thewriter = writer(f)
    header = ["Review #", "Rating", "Year/Month", "Reviewer Location", "Review Title", "Review Text"]
    thewriter.writerow(header)
    # grabbing variables from the data
    for result in soup.select('#tab-data-qa-reviews-0 [data-automation="reviewCard"]'):
        rating = (result.select_one("svg[aria-label]")["aria-label"])[0:3] # wiped out the text part
        if (result.find("div", class_ = "RpeCd")) is not None:
            year_month = (result.find("div", class_ = "RpeCd")).text[0:8] # wiped out unnecessary text
        else: 
            year_month = "N/A"
        if result.find("div", class_ = "JINyA") is not None:
            reviewer_location = re.sub(pattern, "", (((result.find("div", class_ = "JINyA")).text)[:-13])) # regrex module / :-13 used to wipe out some unnecessary text
        else:
            reviewer_location = "N/A"
        if len(reviewer_location) < 3: # some reviews don't have location
            reviewer_location = "N/A"
        review_text = (result.find("div", class_ = "biGQs _P pZUbB KxBGd")).text
        review_title = (result.find("span", class_ = "yCeTE")).text
        review_num += 1
        each_review = [review_num, rating, year_month, reviewer_location, review_title, review_text]
        thewriter.writerow(each_review)

The csv file is created and working. But the code only captures the first 10 reviews on the website. I will loop the code for 325 times as there are 3241 written reviews at the time (2022.10.21). The file including the first 10 reviews is named "Disneyland_Tokyo_Reviews_proto.csv"

### 1.1 Tokyo Disneyland (final)

In [17]:
with open("Disneyland_Tokyo_Reviews.csv", "w", newline = "", encoding = "utf8") as f:
    thewriter = writer(f)
    header = ["Review #", "Rating", "Year/Month", "Reviewer Location", "Review Title", "Review Text"]
    thewriter.writerow(header)
    review_num = 0
    pattern = r"[0-9]"
    for page in range (0, 3250, 10):
        page = str(page)
        if page == 0:
            url = "https://www.tripadvisor.com/Attraction_Review-g14134868-d320634-Reviews-Tokyo_Disneyland-Maihama_Urayasu_Chiba_Prefecture_Kanto.html"
        else:
            url = "https://www.tripadvisor.com/Attraction_Review-g14134868-d320634-Reviews-or" + page + "-Tokyo_Disneyland-Maihama_Urayasu_Chiba_Prefecture_Kanto.html"
        headers = {'User-Agent': 'Mozilla/5.0'}
        page = requests.get(url, headers=headers)
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find_all("div", class_="_c")
        for result in soup.select('#tab-data-qa-reviews-0 [data-automation="reviewCard"]'):
            rating = (result.select_one("svg[aria-label]")["aria-label"])[0:3] # wiped out the text part and converted into ints
            if (result.find("div", class_ = "RpeCd")) is not None:
                year_month = (result.find("div", class_ = "RpeCd")).text[0:8]
            else:
                year_month = "N/A"
            if result.find("div", class_ = "JINyA") is not None:
                reviewer_location = re.sub(pattern, "", (((result.find("div", class_ = "JINyA")).text)[:-13])) # regrex module / :-13 used to wipe out some unnecessary text
            else:
                reviewer_location = "N/A"
            if len(reviewer_location) < 3: # some reviews don't have location
                reviewer_location = "N/A"
            review_text = (result.find("div", class_ = "biGQs _P pZUbB KxBGd")).text
            review_title = (result.find("span", class_ = "yCeTE")).text
            review_num += 1
            each_review = [review_num, rating, year_month, reviewer_location, review_title, review_text]
            thewriter.writerow(each_review)
    

Now every written reviews as of now (2022.10.21) are stored in a CSV file named "Disneyland_tokyo_Revuews.csv". 

## 2. Disneyland Paris

The same process will be done with other locations.

Looped 1842 times (2022.10.21)

In [None]:
with open("Disneyland_Paris_Reviews.csv", "w", newline = "", encoding = "utf8") as f:
    thewriter = writer(f)
    header = ["Review #", "Rating", "Year/Month", "Reviewer Location", "Review Title", "Review Text"]
    thewriter.writerow(header)
    review_num = 0
    pattern = r"[0-9]"
    for page in range (0, 18420, 10):
        page = str(page)
        if page == 0:
            url = "https://www.tripadvisor.com/Attraction_Review-g226865-d189258-Reviews-Disneyland_Paris-Marne_la_Vallee_Seine_et_Marne_Ile_de_France.html"
        else:
            url = "https://www.tripadvisor.com/Attraction_Review-g226865-d189258-Reviews-or" + page + "-Disneyland_Paris-Marne_la_Vallee_Seine_et_Marne_Ile_de_France.html"
        headers = {'User-Agent': 'Mozilla/5.0'}
        page = requests.get(url, headers=headers)
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find_all("div", class_="_c")
        for result in soup.select('#tab-data-qa-reviews-0 [data-automation="reviewCard"]'):
            rating = (result.select_one("svg[aria-label]")["aria-label"])[0:3] # wiped out the text part and converted into ints
            if (result.find("div", class_ = "RpeCd")) is not None:
                year_month = (result.find("div", class_ = "RpeCd")).text[0:8]
            else:
                year_month = "N/A"
            if result.find("div", class_ = "JINyA") is not None:
                reviewer_location = re.sub(pattern, "", (((result.find("div", class_ = "JINyA")).text)[:-13])) # regrex module / :-13 used to wipe out some unnecessary text
            else:
                reviewer_location = "N/A"
            if len(reviewer_location) < 3: # some reviews don't have location
                reviewer_location = "N/A"
            review_text = (result.find("div", class_ = "biGQs _P pZUbB KxBGd")).text
            review_title = (result.find("span", class_ = "yCeTE")).text
            review_num += 1
            each_review = [review_num, rating, year_month, reviewer_location, review_title, review_text]
            thewriter.writerow(each_review)

## 3. Hong Kong Disneyland

Looped 1233 times (2022.10.21)

In [None]:
with open("Disneyland_HongKong_Reviews.csv", "w", newline = "", encoding = "utf8") as f:
    thewriter = writer(f)
    header = ["Review #", "Rating", "Year/Month", "Reviewer Location", "Review Title", "Review Text"]
    thewriter.writerow(header)
    review_num = 0
    pattern = r"[0-9]"
    for page in range (0, 12330, 10):
        page = str(page)
        if page == 0:
            url = "https://www.tripadvisor.com/Attraction_Review-g294217-d543602-Reviews-Hong_Kong_Disneyland-Hong_Kong.html"
        else:
            url = "https://www.tripadvisor.com/Attraction_Review-g294217-d543602-Reviews-or" + page + "-Hong_Kong_Disneyland-Hong_Kong.html"
        headers = {'User-Agent': 'Mozilla/5.0'}
        page = requests.get(url, headers=headers)
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find_all("div", class_="_c")
        for result in soup.select('#tab-data-qa-reviews-0 [data-automation="reviewCard"]'):
            rating = (result.select_one("svg[aria-label]")["aria-label"])[0:3] # wiped out the text part and converted into ints
            if (result.find("div", class_ = "RpeCd")) is not None:
                year_month = (result.find("div", class_ = "RpeCd")).text[0:8]
            else:
                year_month = "N/A"
            if result.find("div", class_ = "JINyA") is not None:
                reviewer_location = re.sub(pattern, "", (((result.find("div", class_ = "JINyA")).text)[:-13])) # regrex module / :-13 used to wipe out some unnecessary text
            else:
                reviewer_location = "N/A"
            if len(reviewer_location) < 3: # some reviews don't have location
                reviewer_location = "N/A"
            review_text = (result.find("div", class_ = "biGQs _P pZUbB KxBGd")).text
            review_title = (result.find("span", class_ = "yCeTE")).text
            review_num += 1
            each_review = [review_num, rating, year_month, reviewer_location, review_title, review_text]
            thewriter.writerow(each_review)

## 4. Disney California Adventure Park

Looped 1315 times (2022.10.21)

In [None]:
with open("Disneyland_California_Adventure_Park_Reviews.csv", "w", newline = "", encoding = "utf8") as f:
    thewriter = writer(f)
    header = ["Review #", "Rating", "Year/Month", "Reviewer Location", "Review Title", "Review Text"]
    thewriter.writerow(header)
    review_num = 0
    pattern = r"[0-9]"
    for page in range (0, 13150, 10):
        page = str(page)
        if page == 0:
            url = "https://www.tripadvisor.com/Attraction_Review-g29092-d186690-Reviews-Disney_California_Adventure_Park-Anaheim_California.html"
        else:
            url = "https://www.tripadvisor.com/Attraction_Review-g29092-d186690-Reviews-or" + page + "-Disney_California_Adventure_Park-Anaheim_California.html"
        headers = {'User-Agent': 'Mozilla/5.0'}
        page = requests.get(url, headers=headers)
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find_all("div", class_="_c")
        for result in soup.select('#tab-data-qa-reviews-0 [data-automation="reviewCard"]'):
            rating = (result.select_one("svg[aria-label]")["aria-label"])[0:3] # wiped out the text part and converted into ints
            if (result.find("div", class_ = "RpeCd")) is not None:
                year_month = (result.find("div", class_ = "RpeCd")).text[0:8]
            else:
                year_month = "N/A"
            if result.find("div", class_ = "JINyA") is not None:
                reviewer_location = re.sub(pattern, "", (((result.find("div", class_ = "JINyA")).text)[:-13])) # regrex module / :-13 used to wipe out some unnecessary text
            else:
                reviewer_location = "N/A"
            if len(reviewer_location) < 3: # some reviews don't have location
                reviewer_location = "N/A"
            review_text = (result.find("div", class_ = "biGQs _P pZUbB KxBGd")).text
            review_title = (result.find("span", class_ = "yCeTE")).text
            review_num += 1
            each_review = [review_num, rating, year_month, reviewer_location, review_title, review_text]
            thewriter.writerow(each_review)

## 5. Shanghai Disneyland
Looped 141 times (2022.10.21)

In [None]:
with open("Disneyland_Shanghai_Reviews.csv", "w", newline = "", encoding = "utf8") as f:
    thewriter = writer(f)
    header = ["Review #", "Rating", "Year/Month", "Reviewer Location", "Review Title", "Review Text"]
    thewriter.writerow(header)
    review_num = 0
    pattern = r"[0-9]"
    for page in range (0, 1410, 10):
        page = str(page)
        if page == 0:
            url = "https://www.tripadvisor.com/Attraction_Review-g308272-d10383031-Reviews-Shanghai_Disneyland-Shanghai.html"
        else:
            url = "https://www.tripadvisor.com/Attraction_Review-g308272-d10383031-Reviews-or" + page + "-Shanghai_Disneyland-Shanghai.html"
        headers = {'User-Agent': 'Mozilla/5.0'}
        page = requests.get(url, headers=headers)
        soup = BeautifulSoup(page.content, "html.parser")
        results = soup.find_all("div", class_="_c")
        for result in soup.select('#tab-data-qa-reviews-0 [data-automation="reviewCard"]'):
            rating = (result.select_one("svg[aria-label]")["aria-label"])[0:3] # wiped out the text part and converted into ints
            if (result.find("div", class_ = "RpeCd")) is not None:
                year_month = (result.find("div", class_ = "RpeCd")).text[0:8]
            else:
                year_month = "N/A"
            if result.find("div", class_ = "JINyA") is not None:
                reviewer_location = re.sub(pattern, "", (((result.find("div", class_ = "JINyA")).text)[:-13])) # regrex module / :-13 used to wipe out some unnecessary text
            else:
                reviewer_location = "N/A"
            if len(reviewer_location) < 3: # some reviews don't have location
                reviewer_location = "N/A"
            review_text = (result.find("div", class_ = "biGQs _P pZUbB KxBGd")).text
            review_title = (result.find("span", class_ = "yCeTE")).text
            review_num += 1
            each_review = [review_num, rating, year_month, reviewer_location, review_title, review_text]
            thewriter.writerow(each_review)

## 6. Changing location values as country
The location variables are stored in cities, states, countries, and N/A values. Examples are shown on "Disneyland_Tokyo_Reviews_proto.csv". The following set of codes will convert them into countries.

### 6.1 Tokyo Disneyland

As experiencing timeout errors, I will split the attempts.
keep_default_na was put to boolean value False as if it's true N/A value will turn into nan. 

In [18]:
Tokyo = pd.read_csv("Disneyland_Tokyo_Reviews.csv", keep_default_na=False)
for i in range (0, 500):
    location_ = Tokyo["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Tokyo.loc[i, "Reviewer Location"] = location
    Tokyo.to_csv("Disneyland_Tokyo_Reviews.csv", index = False)

In [23]:
Tokyo = pd.read_csv("Disneyland_Tokyo_Reviews.csv", keep_default_na=False)
for i in range (500, 1000):
    location_ = Tokyo["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Tokyo.loc[i, "Reviewer Location"] = location
    Tokyo.to_csv("Disneyland_Tokyo_Reviews.csv", index = False)

From 500 - 1000, data from row 842 was reporting timeout error, so that value (Rhodes Town, Greece) was entered manually. 

In [26]:
Tokyo = pd.read_csv("Disneyland_Tokyo_Reviews.csv", keep_default_na=False)
for i in range (1000, 1500):
    location_ = Tokyo["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Tokyo.loc[i, "Reviewer Location"] = location
    Tokyo.to_csv("Disneyland_Tokyo_Reviews.csv", index = False)

From 1000 - 1500, data from row 1370 was reporting timeout error, so that value (San Jose, California) was entered manually. 

In [28]:
Tokyo = pd.read_csv("Disneyland_Tokyo_Reviews.csv", keep_default_na=False)
for i in range (1500, 2000):
    location_ = Tokyo["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Tokyo.loc[i, "Reviewer Location"] = location
    Tokyo.to_csv("Disneyland_Tokyo_Reviews.csv", index = False)

For 1500 - 2000, data from row 1998 was reporting timeout error, so that value (Shanghai, China) was entered manually. 

In [31]:
Tokyo = pd.read_csv("Disneyland_Tokyo_Reviews.csv", keep_default_na=False)
for i in range (2000, 2500):
    location_ = Tokyo["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Tokyo.loc[i, "Reviewer Location"] = location
    Tokyo.to_csv("Disneyland_Tokyo_Reviews.csv", index = False)

For 2000 - 2500, data from row 2104 was reporting timeout error, so that value (Capital Federal District, Argentina) was entered manually. 

In [32]:
Tokyo = pd.read_csv("Disneyland_Tokyo_Reviews.csv", keep_default_na=False)
for i in range (2500, 3000):
    location_ = Tokyo["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Tokyo.loc[i, "Reviewer Location"] = location
    Tokyo.to_csv("Disneyland_Tokyo_Reviews.csv", index = False)

In [34]:
Tokyo = pd.read_csv("Disneyland_Tokyo_Reviews.csv", keep_default_na=False)
for i in range (3000, 3241):
    location_ = Tokyo["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Tokyo.loc[i, "Reviewer Location"] = location
    Tokyo.to_csv("Disneyland_Tokyo_Reviews.csv", index = False)

For 3000 - 3241, data from row 3067 was reporting timeout error, so that value (Tokyo Prefecture, Japan) was entered manually. 

### 6.2 Disneyland Paris

From the process above, I discovered that the timeout error occurs only on certain addresses, so the process from now will be done in one code block, restarting from the middle when a timeout error occurs. 

In [14]:
Paris = pd.read_csv("Disneyland_Paris_Reviews.csv", keep_default_na=False)
for i in range (0, 18407):
    location_ = Paris["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Paris.loc[i, "Reviewer Location"] = location
    Paris.to_csv("Disneyland_Paris_Reviews.csv", index = False)

Row and values with timeout error written down:

1215 Rhodes Town, Greece

1723 Province of Seville, Spain 

2825 North Brabant Province, The Netherlands 

6322 Bristol, UK 

7400 Knowsley Village, UK 

9711 southeast england 

10017 Washingon D.C. 

10119 Kos Town, Greece 

11951 san jose, California

14912 Springfield, MO 

15311 Chania Town, Greece 

15638 South of France

16465 Saint Wenn, UK 

16646 U.S.A. East Coast

18015 France , North East 

18189 Bucks, England 

### 6.3 Hong King Disneyland

In [20]:
HK = pd.read_csv("Disneyland_HongKong_Reviews.csv", keep_default_na=False)
for i in range (0, 12330):
    location_ = HK["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    HK.loc[i, "Reviewer Location"] = location
    HK.to_csv("Disneyland_HongKong_Reviews.csv", index = False)

Row and values with timeout error written down:

86 Port de Sant Miguel, Spain 

4799 Province of Florence, Italy 

5491 Southern Norway, Norway 

8742 Labuan Town, Malaysia

9774 St. Albans, UK 

### 6.4 Disney California Adventure Park

In [28]:
Cal = pd.read_csv("Disneyland_California_Adventure_Park_Reviews.csv", keep_default_na=False)
for i in range (0, 13127):
    location_ = Cal["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    Cal.loc[i, "Reviewer Location"] = location
    Cal.to_csv("Disneyland_California_Adventure_Park_Reviews.csv", index = False)

Row and values with timeout error written down:

1780 Washington, MO

3896 Springfield, MO 

5562 Southern Calif. 

9316 small town, mt 

10345 Hawke Bay Region, New Zealand

12665 North County San Diego 

12878 North Central Indiana 

### 6.5 Shanghai Disneyland

In [30]:
SH = pd.read_csv("Disneyland_Shanghai_Reviews.csv", keep_default_na=False)
for i in range (0, 1410):
    location_ = SH["Reviewer Location"][i]
    if location_ == "N/A":
        location = "N/A"
    else:
        if (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)) is not None:
            if "country" in (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]:
                location = (geolocater.geocode(location_, exactly_one=True,language="en", namedetails=True, addressdetails=True)).raw["address"]["country"]
            else:
                location = "N/A"
    SH.loc[i, "Reviewer Location"] = location
    SH.to_csv("Disneyland_Shanghai_Reviews.csv", index = False)

 Row and values with timeout error written down:
 
 982 Colchester, UK 

### 6.6 Insights
Timeout errror on geocode occurs when the provided input value is somewhat strange for the computer to read (e.g. southwest california). 

# Limitations
- Cannot scrap the data automatically; should check how many reviews are there. 
- Could be better ways of coding.
- Result crashes if review is not written in English
- Scraping takes a long time
- Should be availble to locate and clean them manually