In [2]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import types as T
from pyspark import SparkFiles
from textblob import TextBlob
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

In [3]:
spark = SparkSession\
        .builder\
        .master("local")\
        .config("spark.files.overwrite", True)\
        .getOrCreate()

Let's Start by analyzing the boston neighbourhoods

In [23]:
boston_neighbourhoods_df = spark.read.csv("data/Boston/2020/June/neighbourhoods.csv", header=True)
boston_neighbourhoods_df.toPandas()

Unnamed: 0,neighbourhood_group,neighbourhood
0,,Allston
1,,Back Bay
2,,Bay Village
3,,Beacon Hill
4,,Brighton
5,,Charlestown
6,,Chinatown
7,,Dorchester
8,,Downtown
9,,East Boston


We can see that for Boston atleast, there are no neighbourhood_groups, so lets drop the column

In [24]:
boston_neighbourhoods_df = boston_neighbourhoods_df.drop('neighbourhood_group')
boston_neighbourhoods_df.toPandas()

Unnamed: 0,neighbourhood
0,Allston
1,Back Bay
2,Bay Village
3,Beacon Hill
4,Brighton
5,Charlestown
6,Chinatown
7,Dorchester
8,Downtown
9,East Boston


We will be running the analysis only on the top neighbourhoods that we selected from "http://insideairbnb.com/get-the-data.html" ,which had the highest YoY percent change.

In [25]:
neighbourhoods_list = ["Back Bay", 'South Boston', 'South End', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")
boston_neighbourhoods_df.createOrReplaceTempView('boston_neighbourhoods')
boston_neighbourhoods_df.toPandas()

Unnamed: 0,neighbourhood
0,Back Bay
1,South Boston
2,South End
3,Fenway
4,Allston
5,Dorchester
6,Downtown


Let's create a function to read in listings data, and provide neighbourhood data such as number of listings for a particular year and average price for a year

In [26]:
def neighbourhood_data_analysis(file_path, year):
    boston_listings_df = spark.read.csv(file_path, header=True)
    boston_listings_df.createOrReplaceTempView('boston_listings')
    boston_listings_df = spark.sql("""
        SELECT * FROM boston_listings L
        WHERE L.neighbourhood IN 
        (SELECT neighbourhood FROM boston_neighbourhoods)
    """)
    boston_listings_df.createOrReplaceTempView('boston_listings')
    neighbourhood_data = spark.sql("""
        SELECT neighbourhood, count(neighbourhood) as number_of_listings, ROUND(avg(price), 2) as average_price
        FROM boston_listings
        GROUP BY neighbourhood
        ORDER BY number_of_listings desc
    """)
    return neighbourhood_data.withColumn("year", F.lit(year))
    

2020: 

In [27]:
neighbourhood_data_2020 = neighbourhood_data_analysis("data/Boston/2020/June/listings_original.csv", 2020)
neighbourhood_data_2020.toPandas()

Unnamed: 0,neighbourhood,number_of_listings,average_price,year
0,Dorchester,420,159.15,2020
1,Downtown,277,304.44,2020
2,Back Bay,224,259.63,2020
3,South End,214,171.21,2020
4,Allston,197,107.5,2020
5,South Boston,168,191.32,2020
6,Fenway,167,224.37,2020


2019: 

In [28]:
neighbourhood_data_2019 = neighbourhood_data_analysis("data/Boston/2019/June/listings.csv", 2019)
neighbourhood_data_2019.toPandas()

Unnamed: 0,neighbourhood,number_of_listings,average_price,year
0,Dorchester,558,107.04,2019
1,Downtown,498,264.05,2019
2,Back Bay,471,332.59,2019
3,South End,404,227.28,2019
4,Fenway,400,235.58,2019
5,Allston,338,181.63,2019
6,South Boston,333,225.68,2019


2018:

In [29]:
neighbourhood_data_2018 = neighbourhood_data_analysis("data/Boston/2018/July/listings.csv", 2018)
neighbourhood_data_2018.toPandas()

Unnamed: 0,neighbourhood,number_of_listings,average_price,year
0,Dorchester,519,99.11,2018
1,Fenway,498,237.59,2018
2,Back Bay,464,248.84,2018
3,South End,406,212.26,2018
4,Allston,393,156.82,2018
5,Downtown,361,252.73,2018
6,South Boston,312,211.29,2018


In [30]:
neighbourhood_data_2017 = neighbourhood_data_analysis("data/Boston/2017/October/listings.csv", 2017)
neighbourhood_data_2017.toPandas()

Unnamed: 0,neighbourhood,number_of_listings,average_price,year
0,Back Bay,410,239.88,2017
1,Dorchester,398,98.26,2017
2,Fenway,357,210.86,2017
3,South End,354,214.52,2017
4,Allston,297,99.63,2017
5,Downtown,295,252.32,2017
6,South Boston,264,197.83,2017


In [31]:
neighbourhood_data_2016 = neighbourhood_data_analysis("data/Boston/2016/September/listings.csv", 2016)
neighbourhood_data_2016.toPandas()

Unnamed: 0,neighbourhood,number_of_listings,average_price,year
0,South End,326,204.35,2016
1,Back Bay,302,240.95,2016
2,Fenway,290,220.39,2016
3,Dorchester,269,91.64,2016
4,Allston,260,112.31,2016
5,South Boston,174,187.61,2016
6,Downtown,172,236.46,2016


In [32]:
neighbourhood_data_2015 = neighbourhood_data_analysis("data/Boston/2015/October/listings.csv", 2015)
neighbourhood_data_2015.toPandas()

Unnamed: 0,neighbourhood,number_of_listings,average_price,year
0,South End,251,216.96,2015
1,Allston,223,128.91,2015
2,Back Bay,206,248.8,2015
3,Fenway,185,249.91,2015
4,Dorchester,166,104.14,2015
5,South Boston,114,191.62,2015
6,Downtown,103,243.82,2015


Let's combine all the dataframes

In [33]:
neighbourhood_data = neighbourhood_data_2020.union(neighbourhood_data_2019)
neighbourhood_data = neighbourhood_data.union(neighbourhood_data_2018)
neighbourhood_data = neighbourhood_data.union(neighbourhood_data_2017)
neighbourhood_data = neighbourhood_data.union(neighbourhood_data_2016)
neighbourhood_data = neighbourhood_data.union(neighbourhood_data_2015)

In [34]:
neighbourhood_data = neighbourhood_data.withColumn("number_of_listings", neighbourhood_data["number_of_listings"].cast(IntegerType()))

Let's plot it

In [16]:
import plotly.express as px

In [17]:
init_notebook_mode(connected = True)
fig = px.line(neighbourhood_data.toPandas(), x="year", y=["number_of_listings"], color='neighbourhood')
fig.show()

In [18]:
fig = px.line(neighbourhood_data.toPandas(), x="year", y=["average_price"], color='neighbourhood')
fig.show()

For average_price, we decided that we are going about it in the wrong way, since the listings had to be further divided into room_types to get a better estimate of how each neighbourhood has been affected

For further analysis, we decided to incorporate review data for each listings, and gather the sentiments as well for each review.

We decided to take the top 3 neighbourhoods with the highest poverty rate in 2014(Ref: 'http://www.bostonplans.org/getattachment/f1ecaf8a-d529-40b6-a9bc-8b4419587b86')

In [4]:
def get_complete_boston_data(listings_url, reviews_url, year):
    spark.sparkContext.addFile(reviews_url)
    spark.sparkContext.addFile(listings_url)
    boston_reviews_df = spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz"))
    boston_listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
    boston_reviews_df = boston_reviews_df.where("comments != 'None'")
    boston_reviews_df = boston_reviews_df.withColumnRenamed("id", "review_id")
    boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
    boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == boston_reviews_df["listing_id"], "inner")
    return boston_top_neighbourhood.withColumn("year", F.lit(year))

In [5]:
neighbourhoods_list = ['Fenway', 'Allston', 'Dorchester']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [6]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"

boston_neighbourhood_2020 = get_complete_boston_data(listings_url, reviews_url, 2020)
boston_neighbourhood_2020.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Dorchester,18711,The Dorset Redline | 3BR 1BA | Walk to Redline...,71783,Lance,,42.32212,-71.06096,Entire home/apt,128,...,0.42,39,330,18711,41856,2010-05-12,104064,Tanya,"Awesome experience. Great place, close to eve...",2020
1,Dorchester,18711,The Dorset Redline | 3BR 1BA | Walk to Redline...,71783,Lance,,42.32212,-71.06096,Entire home/apt,128,...,0.42,39,330,18711,48734,2010-06-01,89310,Debi,Great place to stay in Boston - plenty of spac...,2020
2,Dorchester,18711,The Dorset Redline | 3BR 1BA | Walk to Redline...,71783,Lance,,42.32212,-71.06096,Entire home/apt,128,...,0.42,39,330,18711,70408,2010-07-29,91247,Amy,We stayed in the Penthouse in Boston. The hous...,2020
3,Dorchester,18711,The Dorset Redline | 3BR 1BA | Walk to Redline...,71783,Lance,,42.32212,-71.06096,Entire home/apt,128,...,0.42,39,330,18711,118858,2010-10-14,201485,Bonnie,Check in and check out was very easy. We stay...,2020
4,Dorchester,18711,The Dorset Redline | 3BR 1BA | Walk to Redline...,71783,Lance,,42.32212,-71.06096,Entire home/apt,128,...,0.42,39,330,18711,297982,2011-06-05,347925,Roderick,"Wow what a great place to stay, very convenien...",2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29085,Dorchester,42830725,Upscale Duplex | Near T,161724560,David,,42.32112,-71.0567,Entire home/apt,960,...,1.05,4,318,42830725,619667236,2020-03-18,278948310,Mani,It was truly a pleasure to stay at David's hom...,2020
29086,Dorchester,42830725,Upscale Duplex | Near T,161724560,David,,42.32112,-71.0567,Entire home/apt,960,...,1.05,4,318,42830725,619847024,2020-03-19,318923611,Mike,Awesome home!! David is the perfect host!,2020
29087,Dorchester,42831086,Lovely Home | near T,161724560,David,,42.31952,-71.05529,Entire home/apt,740,...,1.06,4,333,42831086,619674889,2020-03-18,333711103,Nathan,David's home is so great! He was very helpful ...,2020
29088,Dorchester,42831086,Lovely Home | near T,161724560,David,,42.31952,-71.05529,Entire home/apt,740,...,1.06,4,333,42831086,620071225,2020-03-20,314497309,Patty,Super clean and wonderful home! My grand kids ...,2020


In [7]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/visualisations/listings.csv"
boston_neighbourhood_2019 = get_complete_boston_data(listings_url, reviews_url, 2019)
boston_neighbourhood_2019.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Dorchester,18711,The Dorset | 3BR 1BA | 1st Floor,71783,Lance,,42.32329,-71.0621,Entire home/apt,126,...,0.45,25,347,18711,41856,2010-05-12,104064,Tanya,"Awesome experience. Great place, close to eve...",2019
1,Dorchester,18711,The Dorset | 3BR 1BA | 1st Floor,71783,Lance,,42.32329,-71.0621,Entire home/apt,126,...,0.45,25,347,18711,48734,2010-06-01,89310,Debi,Great place to stay in Boston - plenty of spac...,2019
2,Dorchester,18711,The Dorset | 3BR 1BA | 1st Floor,71783,Lance,,42.32329,-71.0621,Entire home/apt,126,...,0.45,25,347,18711,70408,2010-07-29,91247,Amy,We stayed in the Penthouse in Boston. The hous...,2019
3,Dorchester,18711,The Dorset | 3BR 1BA | 1st Floor,71783,Lance,,42.32329,-71.0621,Entire home/apt,126,...,0.45,25,347,18711,118858,2010-10-14,201485,Bonnie,Check in and check out was very easy. We stay...,2019
4,Dorchester,18711,The Dorset | 3BR 1BA | 1st Floor,71783,Lance,,42.32329,-71.0621,Entire home/apt,126,...,0.45,25,347,18711,297982,2011-06-05,347925,Roderick,"Wow what a great place to stay, very convenien...",2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39657,Fenway,35307358,"Perfect location! Fenway, Newbury st, MFA & More",61151010,Ben,,42.34525,-71.08966,Entire home/apt,154,...,1,1,18,35307358,465925677,2019-06-08,178836810,Andy,This apartment was very close to fenway and wa...,2019
39658,Dorchester,35386839,Cozy and Spacious #1,266400490,Gil,,42.30034,-71.06938,Private room,71,...,1,5,71,35386839,468935333,2019-06-13,88151830,Johannes,10 minutes walk to T station. The room was spa...,2019
39659,Dorchester,35424411,Cozy and Spacious #3,266400490,Gil,,42.30135,-71.06941,Private room,45,...,1,5,134,35424411,467661375,2019-06-10,254158418,Jose R.,Great place I recommended I am looking forward...,2019
39660,Allston,35513504,"Allston & Brighton, Cozy Room Next to Green Li...",266658174,Derya,,42.34935,-71.13401,Private room,49,...,1,3,355,35513504,467537965,2019-06-10,78433706,Andy,Good clean bed near some fun bars! Would recom...,2019


In [8]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/visualisations/listings.csv"
boston_neighbourhood_2018 = get_complete_boston_data(listings_url, reviews_url, 2018)
boston_neighbourhood_2018.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Allston,10758,Budget Single - share bath,38440,The Farrington Inn,,42.35344916645469,-71.13179683452269,Private room,115,...,0.04,4,0,10758,21727349,2014-10-22,7365249,David,If you're looking for a place that's convenien...,2018
1,Allston,10758,Budget Single - share bath,38440,The Farrington Inn,,42.35344916645469,-71.13179683452269,Private room,115,...,0.04,4,0,10758,49371703,2015-10-03,43901704,Joseph,Check-in was very easy and the host was attent...,2018
2,Allston,10807,Budget Double - share bath,38440,The Farrington Inn,,42.35412285944836,-71.1304857316918,Private room,80,...,0.02,4,0,10807,21271656,2014-10-13,22372886,Noemie,Chambre double correcte avec salle de bain (pa...,2018
3,Allston,10809,Private Room with 2 Beds share Bath,38440,The Farrington Inn,,42.35478799360237,-71.13208846148751,Private room,100,...,0.27,4,0,10809,12160763,2014-04-24,13526964,Kristopher,"We love it. It was vert comfy, private enough....",2018
4,Allston,10809,Private Room with 2 Beds share Bath,38440,The Farrington Inn,,42.35478799360237,-71.13208846148751,Private room,100,...,0.27,4,0,10809,22386764,2014-11-04,22502466,Mark,"The host was very welcoming, on time and effic...",2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31407,Fenway,26858288,Boston Multi-unit building,34768507,Maryam,,42.34569614086391,-71.10741727310668,Private room,42,...,4,2,56,26858288,291227663,2018-07-15,37111942,Aaron,Maryam's place was a great value! Location is ...,2018
31408,Fenway,26858288,Boston Multi-unit building,34768507,Maryam,,42.34569614086391,-71.10741727310668,Private room,42,...,4,2,56,26858288,291883167,2018-07-16,158076976,Jinfeng,"Maryam is a easygoing and friendly person, she...",2018
31409,Allston,26874729,Temporary Accommodation For August Available.,202133335,Goutham,,42.35328008480706,-71.12747794944214,Shared room,25,...,2,1,291,26874729,291456361,2018-07-15,202577445,David,The host canceled this reservation 2 days befo...,2018
31410,Allston,26874729,Temporary Accommodation For August Available.,202133335,Goutham,,42.35328008480706,-71.12747794944214,Shared room,25,...,2,1,291,26874729,291460341,2018-07-15,202598002,Maciej,The host canceled this reservation 6 days befo...,2018


In [9]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/visualisations/listings.csv"
boston_neighbourhood_2017 = get_complete_boston_data(listings_url, reviews_url, 2017)
boston_neighbourhood_2017.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Allston,12233830,Modern Boston 2+ Bdrm Harvard/BU/BC/Boston Lan...,6933825,Estee,,42.356740941803814,-71.13651560421027,Entire home/apt,139,...,4.17,1,144,12233830,70018232,2016-04-15,2134782,Shlomi,Couldn't have been happier. The apartment was ...,2017
1,Allston,12233830,Modern Boston 2+ Bdrm Harvard/BU/BC/Boston Lan...,6933825,Estee,,42.356740941803814,-71.13651560421027,Entire home/apt,139,...,4.17,1,144,12233830,70649304,2016-04-19,37567875,Jessica,The apartment is very nice- as described and v...,2017
2,Allston,12233830,Modern Boston 2+ Bdrm Harvard/BU/BC/Boston Lan...,6933825,Estee,,42.356740941803814,-71.13651560421027,Entire home/apt,139,...,4.17,1,144,12233830,70882891,2016-04-21,68058568,Khileigh,"This is a brand new gorgeous place, very clean...",2017
3,Allston,12233830,Modern Boston 2+ Bdrm Harvard/BU/BC/Boston Lan...,6933825,Estee,,42.356740941803814,-71.13651560421027,Entire home/apt,139,...,4.17,1,144,12233830,71725720,2016-04-28,43639502,Rajesh,Estee and Josh are great hosts. Very welcoming...,2017
4,Allston,12233830,Modern Boston 2+ Bdrm Harvard/BU/BC/Boston Lan...,6933825,Estee,,42.356740941803814,-71.13651560421027,Entire home/apt,139,...,4.17,1,144,12233830,74530572,2016-05-15,58310922,Stephenie,Estee was super sweet and so very accommodatin...,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20692,Dorchester,533114,The Sawyer | 2BR 2.5BA | 1st Floor,71783,Lance,,42.31167121627835,-71.06168214898854,Entire home/apt,168,...,0.37,10,89,533114,160746370,2017-06-15,17983462,Joanna,Lance's place was swanky and spacious and we w...,2017
20693,Dorchester,533114,The Sawyer | 2BR 2.5BA | 1st Floor,71783,Lance,,42.31167121627835,-71.06168214898854,Entire home/apt,168,...,0.37,10,89,533114,163317404,2017-06-24,68398294,Alex,Very nice place that is close to public transi...,2017
20694,Dorchester,533114,The Sawyer | 2BR 2.5BA | 1st Floor,71783,Lance,,42.31167121627835,-71.06168214898854,Entire home/apt,168,...,0.37,10,89,533114,169370218,2017-07-12,22203613,Abigail,Beautiful place!,2017
20695,Dorchester,533114,The Sawyer | 2BR 2.5BA | 1st Floor,71783,Lance,,42.31167121627835,-71.06168214898854,Entire home/apt,168,...,0.37,10,89,533114,173040677,2017-07-22,109045241,Karen,Lance's house is very nice - he offers plenty ...,2017


In [10]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/visualisations/listings.csv"
boston_neighbourhood_2016 = get_complete_boston_data(listings_url, reviews_url, 2016)
boston_neighbourhood_2016.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Dorchester,7246272,Stylish Bedroom Suite with Bathroom - Near Subway,19250407,Alex (And Bob),,42.3177560295494,-71.06167666341562,Private room,108,...,3.23,1,138,7246272,49011639,2015-09-30,41266959,Juliano,My wife and I had a great experience while sta...,2016
1,Dorchester,7246272,Stylish Bedroom Suite with Bathroom - Near Subway,19250407,Alex (And Bob),,42.3177560295494,-71.06167666341562,Private room,108,...,3.23,1,138,7246272,49287275,2015-10-02,8942041,Hannah,Had a great time at Robert and Alex's place. T...,2016
2,Dorchester,7246272,Stylish Bedroom Suite with Bathroom - Near Subway,19250407,Alex (And Bob),,42.3177560295494,-71.06167666341562,Private room,108,...,3.23,1,138,7246272,50085405,2015-10-09,30324147,Leith,We were welcomed by Rob on early arrival and h...,2016
3,Dorchester,7246272,Stylish Bedroom Suite with Bathroom - Near Subway,19250407,Alex (And Bob),,42.3177560295494,-71.06167666341562,Private room,108,...,3.23,1,138,7246272,50367275,2015-10-11,45325384,Warren,It was a first AirBnB experience for my girlfr...,2016
4,Dorchester,7246272,Stylish Bedroom Suite with Bathroom - Near Subway,19250407,Alex (And Bob),,42.3177560295494,-71.06167666341562,Private room,108,...,3.23,1,138,7246272,50468704,2015-10-12,41772558,Benjamin,Had a great experience!! Alex was a great host...,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13178,Allston,7462268,"Allston, close to Harvard Business School + BU D",19548932,Joe,,42.35819787180854,-71.12901562046952,Private room,75,...,2.32,8,290,7462268,80537457,2016-06-18,22034145,Antonio,Joe y su mujer son encantadores. La habitación...,2016
13179,Allston,7462268,"Allston, close to Harvard Business School + BU D",19548932,Joe,,42.35819787180854,-71.12901562046952,Private room,75,...,2.32,8,290,7462268,83640094,2016-07-03,40052513,Steve,Joe was on his way to Jamaica to be married! o...,2016
13180,Allston,7462268,"Allston, close to Harvard Business School + BU D",19548932,Joe,,42.35819787180854,-71.12901562046952,Private room,75,...,2.32,8,290,7462268,85797088,2016-07-13,77129134,Nick,The room was very clean as were the bathrooms ...,2016
13181,Allston,7462268,"Allston, close to Harvard Business School + BU D",19548932,Joe,,42.35819787180854,-71.12901562046952,Private room,75,...,2.32,8,290,7462268,97264637,2016-08-26,15799803,Vid,Staying in Lower Allston at Joe and Nancy's pl...,2016


In [11]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/visualisations/listings.csv"
boston_neighbourhood_2015 = get_complete_boston_data(listings_url, reviews_url, 2015)
boston_neighbourhood_2015.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Allston,4278873,STUNNING ROOM PRIVATE BATH ALLSTON,9189449,Cindy,,42.35192949192535,-71.14205607147838,Private room,85,...,2.18,6,335,4278873,23590000,2014-12-05,20261863,Sunny,Great place...super warm home in a super cold ...,2015
1,Allston,4278873,STUNNING ROOM PRIVATE BATH ALLSTON,9189449,Cindy,,42.35192949192535,-71.14205607147838,Private room,85,...,2.18,6,335,4278873,25103537,2015-01-06,24847473,Louis,"""I stayed here for about 2 weeks and really ca...",2015
2,Allston,4278873,STUNNING ROOM PRIVATE BATH ALLSTON,9189449,Cindy,,42.35192949192535,-71.14205607147838,Private room,85,...,2.18,6,335,4278873,28495244,2015-03-24,29219963,Keith,Staying at Cindy's was great!! I had a comfor...,2015
3,Allston,4278873,STUNNING ROOM PRIVATE BATH ALLSTON,9189449,Cindy,,42.35192949192535,-71.14205607147838,Private room,85,...,2.18,6,335,4278873,28682269,2015-03-28,28805399,Peter,Cindy is great and the accommodations were per...,2015
4,Allston,4278873,STUNNING ROOM PRIVATE BATH ALLSTON,9189449,Cindy,,42.35192949192535,-71.14205607147838,Private room,85,...,2.18,6,335,4278873,28740213,2015-03-29,29169681,Gregory,The house was nice and Cindy was lovely!,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8365,Dorchester,3394437,Beautiful Single-Family Boston Home,16912440,Meaghan,,42.313955247481694,-71.06142190679236,Entire home/apt,300,...,0.59,1,329,3394437,41966302,2015-08-10,35193653,Simone,I enjoyed really a lot our period in Boston. U...,2015
8366,Dorchester,3394437,Beautiful Single-Family Boston Home,16912440,Meaghan,,42.313955247481694,-71.06142190679236,Entire home/apt,300,...,0.59,1,329,3394437,42541015,2015-08-14,11939439,Isabelle,"Very nice stay in this confortable house, perf...",2015
8367,Dorchester,533152,The Penthouse | 2BR 1BA | 4th Floor,71783,Lance,,42.321889409331035,-71.0618292667287,Entire home/apt,126,...,0.17,10,209,533152,11737165,2014-04-14,11403568,Spencer,I had a fantastic experience with this apartme...,2015
8368,Dorchester,533152,The Penthouse | 2BR 1BA | 4th Floor,71783,Lance,,42.321889409331035,-71.0618292667287,Entire home/apt,126,...,0.17,10,209,533152,21080959,2014-10-11,20269964,Philipp,We had a very pleasant stay in this very comfo...,2015


Now that we have the combined data for all 5 years for the required neighbourhoods, we can go into a deepdive on each of the neighbourhoods

Fenway

Let's combine Fenway data from all the years

In [12]:
fenway_neighbourhood = boston_neighbourhood_2015.where("neighbourhood == 'Fenway'").union(boston_neighbourhood_2016.where("neighbourhood == 'Fenway'"))
fenway_neighbourhood = fenway_neighbourhood.union(boston_neighbourhood_2017.where("neighbourhood == 'Fenway'"))
fenway_neighbourhood = fenway_neighbourhood.union(boston_neighbourhood_2018.where("neighbourhood == 'Fenway'"))
fenway_neighbourhood = fenway_neighbourhood.union(boston_neighbourhood_2019.where("neighbourhood == 'Fenway'"))
fenway_neighbourhood = fenway_neighbourhood.union(boston_neighbourhood_2020.where("neighbourhood == 'Fenway'"))
fenway_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Fenway,5825829,Brownstone Luxury Penthouse Condo,30239904,Andrew,,42.34661110156134,-71.10465898396323,Private room,125,...,4.88,1,271,5825829,30423473,2015-04-21,31108159,Heather,"Andrew was a kind, friendly, and informative h...",2015
1,Fenway,5825829,Brownstone Luxury Penthouse Condo,30239904,Andrew,,42.34661110156134,-71.10465898396323,Private room,125,...,4.88,1,271,5825829,30651204,2015-04-25,23058421,Charles,"Andrew is truly a stellar host, and his apt is...",2015
2,Fenway,5825829,Brownstone Luxury Penthouse Condo,30239904,Andrew,,42.34661110156134,-71.10465898396323,Private room,125,...,4.88,1,271,5825829,30750827,2015-04-26,3284437,Shannon,"Andrew has a very beautiful and clean place, t...",2015
3,Fenway,5825829,Brownstone Luxury Penthouse Condo,30239904,Andrew,,42.34661110156134,-71.10465898396323,Private room,125,...,4.88,1,271,5825829,31008160,2015-04-30,23281539,Rich,Andrew is a great host!! He had plenty of adv...,2015
4,Fenway,5825829,Brownstone Luxury Penthouse Condo,30239904,Andrew,,42.34661110156134,-71.10465898396323,Private room,125,...,4.88,1,271,5825829,31241938,2015-05-03,3391960,Casey,"Andrew was a great host, with lots of helpful ...",2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8737,Fenway,7650016,Beautiful Studio in Fenway,24032418,Ella,,42.341840820375516,-71.1013802676703,Entire home/apt,120,...,2.83,1,267,7650016,42332186,2015-08-12,32438064,Sindis,Ella explained to me very well the location an...,2020
8738,Fenway,7650016,Beautiful Studio in Fenway,24032418,Ella,,42.341840820375516,-71.1013802676703,Entire home/apt,120,...,2.83,1,267,7650016,43199752,2015-08-17,8292225,Jackson,We requested to stay the day before we arrived...,2020
8739,Fenway,7650016,Beautiful Studio in Fenway,24032418,Ella,,42.341840820375516,-71.1013802676703,Entire home/apt,120,...,2.83,1,267,7650016,44391093,2015-08-25,42141897,Xuan,"Ella's studio is incredibly awesone, decorated...",2020
8740,Fenway,7650016,Beautiful Studio in Fenway,24032418,Ella,,42.341840820375516,-71.1013802676703,Entire home/apt,120,...,2.83,1,267,7650016,45134308,2015-08-31,38606363,Lauren,My boyfriend and I stayed in Ella's studio apa...,2020


Get the setiment score for each review using textblob library, but before that, the reviews need to be translated to english, as not all the reviews are in english

In [15]:
def translate(sentence):
    try:
        str(TextBlob(x).translate(to= 'en'))
    except:
        return sentence

In [16]:
translate_udf = F.udf(lambda sentence: translate(sentence) if sentence is not None else None , T.StringType())

In [17]:
fenway_neighbourhood = fenway_neighbourhood.withColumn('comments', translate_udf(F.col("comments")))
fenway_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Fenway,8552387,Bright Back Bay Brownstone,28880663,Amanda,,42.35117407918826,-71.08998038575537,Entire home/apt,225,...,,1,9,,,,,,,2015
1,Fenway,8469841,Large Room Back Bay Close to MIT,16953664,Ife,,42.3510130004383,-71.09012784825632,Private room,90,...,1,3,346,8469841,48817108,2015-09-28,44610837,Alan,I had a great 1st experience using airbnb. If...,2015
2,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,47402256,2015-09-18,4228486,Natalie,"The apartment is clean, comfortable and very ...",2015
3,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,44984587,2015-08-30,36039829,Marco,I booked Lian's flat for my parents and they r...,2015
4,Fenway,5086521,Modern & spacious studio for two,1868179,Ryan,,42.34086101708686,-71.08828863604951,Entire home/apt,150,...,1.55,1,130,5086521,46117564,2015-09-08,31812081,Maximilian,"The place was exactly as described, location w...",2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9073,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,30315640,2015-04-20,28743839,Nanci,The description of this place is (email hidden...,2020
9074,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,32567096,2015-05-19,15747050,Ann,A very lovely place in a perfect location. Ne...,2020
9075,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,33313882,2015-05-26,26989637,Prashant,Jennifer was really helpful and provided me an...,2020
9076,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,37398353,2015-07-06,33661506,Kathleen,Jennifer and the apartment were great! At firs...,2020


Sentiment Score:

In [18]:
sentiment_udf = F.udf(lambda x:TextBlob(x).sentiment.polarity if x is not None else None , T.StringType())

In [19]:
fenway_neighbourhood = fenway_neighbourhood.withColumn('review_polarity', sentiment_udf(F.col("comments")))
fenway_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year,review_polarity
0,Fenway,8552387,Bright Back Bay Brownstone,28880663,Amanda,,42.35117407918826,-71.08998038575537,Entire home/apt,225,...,1,9,,,,,,,2015,
1,Fenway,8469841,Large Room Back Bay Close to MIT,16953664,Ife,,42.3510130004383,-71.09012784825632,Private room,90,...,3,346,8469841,48817108,2015-09-28,44610837,Alan,I had a great 1st experience using airbnb. If...,2015,0.31500000000000006
2,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1,58,7757132,47402256,2015-09-18,4228486,Natalie,"The apartment is clean, comfortable and very ...",2015,0.3766666666666667
3,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1,58,7757132,44984587,2015-08-30,36039829,Marco,I booked Lian's flat for my parents and they r...,2015,0.07714285714285714
4,Fenway,5086521,Modern & spacious studio for two,1868179,Ryan,,42.34086101708686,-71.08828863604951,Entire home/apt,150,...,1,130,5086521,46117564,2015-09-08,31812081,Maximilian,"The place was exactly as described, location w...",2015,0.4461111111111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9073,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,30315640,2015-04-20,28743839,Nanci,The description of this place is (email hidden...,2020,0.3726190476190476
9074,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,32567096,2015-05-19,15747050,Ann,A very lovely place in a perfect location. Ne...,2020,0.21796969696969692
9075,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,33313882,2015-05-26,26989637,Prashant,Jennifer was really helpful and provided me an...,2020,0.4979166666666667
9076,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,37398353,2015-07-06,33661506,Kathleen,Jennifer and the apartment were great! At firs...,2020,0.13065907522429263


In [21]:
fenway_neighbourhood_home = fenway_neighbourhood.where("room_type == 'Entire home/apt'").toPandas()
fig = px.histogram(fenway_neighbourhood_home, x="review_polarity")
fig.show()

In [22]:
fenway_neighbourhood_private = fenway_neighbourhood.where("room_type == 'Private room'").toPandas()
fig = px.histogram(fenway_neighbourhood_private, x="review_polarity")
fig.show()

In [23]:
fenway_neighbourhood_shared = fenway_neighbourhood.where("room_type == 'Shared room'").toPandas()
fig = px.histogram(fenway_neighbourhood_private, x="review_polarity")
fig.show()

Let's see the average price for each room_type in this neighbourhood

In [16]:
fenway_neighbourhood_summary = fenway_neighbourhood.select("id", "price", "room_type", "year").dropDuplicates()
fenway_neighbourhood_summary.toPandas()

Unnamed: 0,id,price,room_type,year
0,5834413,599,Entire home/apt,2016
1,4112115,215,Entire home/apt,2015
2,6793913,599,Entire home/apt,2016
3,8064306,82,Private room,2016
4,2915595,199,Entire home/apt,2017
...,...,...,...,...
769,7533100,40,Private room,2016
770,5296248,250,Entire home/apt,2019
771,2083261,78,Private room,2019
772,6728819,95,Private room,2020


In [22]:
fenway_neighbourhood_summary.where("year == 2015 and room_type == 'Private room'").toPandas()


Unnamed: 0,id,price,room_type,year
0,8469841,90,Private room,2015
1,2098613,90,Private room,2015
2,6494914,150,Private room,2015
3,6240901,88,Private room,2015
4,6728819,95,Private room,2015
5,6717173,101,Private room,2015
6,5825829,125,Private room,2015
7,67946,259,Private room,2015
8,39116,125,Private room,2015
9,7533100,40,Private room,2015


In [18]:
fenway_neighbourhood_summary.createOrReplaceTempView("room_type")
neighbourhood = spark.sql("""
    SELECT room_type, count(room_type) as number_of_listings, ROUND(avg(price), 2) as average_price, year
    FROM room_type
    GROUP BY room_type, year
    ORDER BY year
""").toPandas()


In [19]:
neighbourhood

Unnamed: 0,room_type,number_of_listings,average_price,year
0,Private room,30,116.73,2015
1,Entire home/apt,96,251.4,2015
2,Shared room,3,78.0,2015
3,Private room,30,116.73,2016
4,Shared room,3,78.0,2016
5,Entire home/apt,96,251.4,2016
6,Shared room,3,78.0,2017
7,Entire home/apt,96,251.4,2017
8,Private room,30,116.73,2017
9,Entire home/apt,96,251.4,2018


Allston:

Let's combine Allston data from all the years

In [37]:
allston_neighbourhood = boston_neighbourhood_2015.where("neighbourhood == 'Allston'").union(boston_neighbourhood_2016.where("neighbourhood == 'Allston'"))
allston_neighbourhood = allston_neighbourhood.union(boston_neighbourhood_2017.where("neighbourhood == 'Allston'"))
allston_neighbourhood = allston_neighbourhood.union(boston_neighbourhood_2018.where("neighbourhood == 'Allston'"))
allston_neighbourhood = allston_neighbourhood.union(boston_neighbourhood_2019.where("neighbourhood == 'Allston'"))
allston_neighbourhood = allston_neighbourhood.union(boston_neighbourhood_2020.where("neighbourhood == 'Allston'"))
allston_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,44668713,2015-08-28,12066100,Eddie,"The house owner, Raphael, is a very kind and e...",2015
1,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,35203143,2015-06-16,29246587,Michael,Raphael's home was well maintained and very cl...,2015
2,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,34523184,2015-06-09,26172960,Joe,"""This apartment was perfect for our trip to a ...",2015
3,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,32679028,2015-05-20,5961958,Erich,"Awesome place, great stay, host was very helpf...",2015
4,Allston,7052847,1 big room in a cute 2 bedroom apt,10336060,Rebeca,,42.352805645017064,-71.12398135726829,Private room,60,...,,1,70,,,,,,,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15865,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,42731671,2015-08-15,39121694,Espitau,Chambre spacieuse dans un appartement extrêmem...,2020
15866,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,44856550,2015-08-29,22853111,Kimberli,Great location and comfortable room. My exper...,2020
15867,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,45025544,2015-08-30,12423778,Benjamin,"nice location, easy organization.",2020
15868,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,46061524,2015-09-07,38801239,Cyndal,"John was a kind and accommodating host, even t...",2020


Let's translate all the reviews to english first

In [38]:
allston_neighbourhood = allston_neighbourhood.withColumn('comments', translate_udf(F.col("comments")))
allston_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,44668713,2015-08-28,12066100,Eddie,"The house owner, Raphael, is a very kind and e...",2015
1,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,35203143,2015-06-16,29246587,Michael,Raphael's home was well maintained and very cl...,2015
2,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,34523184,2015-06-09,26172960,Joe,"""This apartment was perfect for our trip to a ...",2015
3,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,32679028,2015-05-20,5961958,Erich,"Awesome place, great stay, host was very helpf...",2015
4,Allston,7052847,1 big room in a cute 2 bedroom apt,10336060,Rebeca,,42.352805645017064,-71.12398135726829,Private room,60,...,,1,70,,,,,,,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15865,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,42731671,2015-08-15,39121694,Espitau,Chambre spacieuse dans un appartement extrêmem...,2020
15866,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,44856550,2015-08-29,22853111,Kimberli,Great location and comfortable room. My exper...,2020
15867,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,45025544,2015-08-30,12423778,Benjamin,"nice location, easy organization.",2020
15868,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2.55,2,233,6703483,46061524,2015-09-07,38801239,Cyndal,"John was a kind and accommodating host, even t...",2020


Calculate the polarity of each review

In [39]:
allston_neighbourhood = allston_neighbourhood.withColumn('review_polarity', sentiment_udf(F.col("comments")))
allston_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year,review_polarity
0,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,3,68,5103430,44668713,2015-08-28,12066100,Eddie,"The house owner, Raphael, is a very kind and e...",2015,0.5616666666666666
1,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,3,68,5103430,35203143,2015-06-16,29246587,Michael,Raphael's home was well maintained and very cl...,2015,0.2985714285714286
2,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,3,68,5103430,34523184,2015-06-09,26172960,Joe,"""This apartment was perfect for our trip to a ...",2015,0.35722610722610726
3,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,3,68,5103430,32679028,2015-05-20,5961958,Erich,"Awesome place, great stay, host was very helpf...",2015,0.52
4,Allston,7052847,1 big room in a cute 2 bedroom apt,10336060,Rebeca,,42.352805645017064,-71.12398135726829,Private room,60,...,1,70,,,,,,,2015,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15865,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2,233,6703483,42731671,2015-08-15,39121694,Espitau,Chambre spacieuse dans un appartement extrêmem...,2020,0.0
15866,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2,233,6703483,44856550,2015-08-29,22853111,Kimberli,Great location and comfortable room. My exper...,2020,0.28950000000000004
15867,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2,233,6703483,45025544,2015-08-30,12423778,Benjamin,"nice location, easy organization.",2020,0.5166666666666666
15868,Allston,6703483,Private Bedroom near Harvard,16790574,John,,42.368279296036285,-71.11385909118216,Private room,69,...,2,233,6703483,46061524,2015-09-07,38801239,Cyndal,"John was a kind and accommodating host, even t...",2020,0.21500000000000002


In [40]:
allston_neighbourhood_home = allston_neighbourhood.where("room_type == 'Entire home/apt'").toPandas()
fig = px.histogram(allston_neighbourhood_home, x="review_polarity", color='room_type')
fig.show()

In [41]:
allston_neighbourhood_private = allston_neighbourhood.where("room_type == 'Private room'").toPandas()
fig = px.histogram(allston_neighbourhood_private, x="review_polarity", color='room_type')
fig.show()

In [42]:
allston_neighbourhood_shared = allston_neighbourhood.where("room_type == 'Shared room'").toPandas()
fig = px.histogram(allston_neighbourhood_shared, x="review_polarity", color='room_type')
fig.show()

Dorchester:

Let's combine Allston data from all the years

In [43]:
dorchester_neighbourhood = boston_neighbourhood_2015.where("neighbourhood == 'Dorchester'").union(boston_neighbourhood_2016.where("neighbourhood == 'Dorchester'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2017.where("neighbourhood == 'Dorchester'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2018.where("neighbourhood == 'Dorchester'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2019.where("neighbourhood == 'Dorchester'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2020.where("neighbourhood == 'Dorchester'"))
dorchester_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Dorchester,6966297,Comfortable Brownstone Home,31175333,Joyce,,42.29053001428256,-71.0591843206319,Private room,72,...,0.73,3,346,6966297,44190708,2015-08-24,37121473,Mazdul,"The room was perfect, everything was in proper...",2015
1,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,47626602,2015-09-21,33891733,Ivy,Victoria is a kindly host. Her house is very ...,2015
2,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,47173093,2015-09-16,5856233,René,Victoria and Tim's home and hosting were perfe...,2015
3,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,46874076,2015-09-14,36118168,Andy,Nice large room with shared bathroom in a good...,2015
4,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,46473154,2015-09-11,35108478,Yi-Fang,"Victoria的房子就在shawmut站附近, 在站內一上樓就要往左邊的出口(非進站入口)...",2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25927,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,33789090,2015-06-01,13584794,Diego,"I never met Orchid, but met with the property ...",2020
25928,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,34653464,2015-06-10,17163682,Jonny,It's very spartan. A bed in a room. A desk. A ...,2020
25929,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,39294896,2015-07-22,35444044,Deborah,The room: The description of the room was quit...,2020
25930,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,40673733,2015-08-01,14298383,Paulo,The owners are very helpful and you can reach ...,2020


Let's translate all the reviews to english first

In [44]:
dorchester_neighbourhood = dorchester_neighbourhood.withColumn('comments', translate_udf(F.col("comments")))
dorchester_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Dorchester,6966297,Comfortable Brownstone Home,31175333,Joyce,,42.29053001428256,-71.0591843206319,Private room,72,...,0.73,3,346,6966297,44190708,2015-08-24,37121473,Mazdul,"The room was perfect, everything was in proper...",2015
1,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,47626602,2015-09-21,33891733,Ivy,Victoria is a kindly host. Her house is very ...,2015
2,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,47173093,2015-09-16,5856233,René,Victoria and Tim's home and hosting were perfe...,2015
3,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,46874076,2015-09-14,36118168,Andy,Nice large room with shared bathroom in a good...,2015
4,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,5.31,2,140,1179399,46473154,2015-09-11,35108478,Yi-Fang,"Victoria的房子就在shawmut站附近, 在站內一上樓就要往左邊的出口(非進站入口)...",2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25927,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,33789090,2015-06-01,13584794,Diego,"I never met Orchid, but met with the property ...",2020
25928,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,34653464,2015-06-10,17163682,Jonny,It's very spartan. A bed in a room. A desk. A ...,2020
25929,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,39294896,2015-07-22,35444044,Deborah,The room: The description of the room was quit...,2020
25930,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,0.58,14,348,3765014,40673733,2015-08-01,14298383,Paulo,The owners are very helpful and you can reach ...,2020


Calculate the polarity of each review

In [45]:
dorchester_neighbourhood = dorchester_neighbourhood.withColumn('review_polarity', sentiment_udf(F.col("comments")))
dorchester_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year,review_polarity
0,Dorchester,6966297,Comfortable Brownstone Home,31175333,Joyce,,42.29053001428256,-71.0591843206319,Private room,72,...,3,346,6966297,44190708,2015-08-24,37121473,Mazdul,"The room was perfect, everything was in proper...",2015,0.525
1,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,2,140,1179399,47626602,2015-09-21,33891733,Ivy,Victoria is a kindly host. Her house is very ...,2015,0.43533333333333335
2,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,2,140,1179399,47173093,2015-09-16,5856233,René,Victoria and Tim's home and hosting were perfe...,2015,0.2602272727272727
3,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,2,140,1179399,46874076,2015-09-14,36118168,Andy,Nice large room with shared bathroom in a good...,2015,0.4357142857142857
4,Dorchester,1179399,Historical Gaslight Street - R1,6430732,Victoria,,42.29524425348655,-71.06544784292608,Private room,65,...,2,140,1179399,46473154,2015-09-11,35108478,Yi-Fang,"Victoria的房子就在shawmut站附近, 在站內一上樓就要往左邊的出口(非進站入口)...",2015,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25927,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,14,348,3765014,33789090,2015-06-01,13584794,Diego,"I never met Orchid, but met with the property ...",2020,0.17733333333333334
25928,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,14,348,3765014,34653464,2015-06-10,17163682,Jonny,It's very spartan. A bed in a room. A desk. A ...,2020,0.3396103896103896
25929,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,14,348,3765014,39294896,2015-07-22,35444044,Deborah,The room: The description of the room was quit...,2020,-0.27555555555555555
25930,Dorchester,3765014,"Umass,MGH,City,BCEC, Longwood(E)",16442921,Orchid,,42.32001844049547,-71.05495674469714,Private room,55,...,14,348,3765014,40673733,2015-08-01,14298383,Paulo,The owners are very helpful and you can reach ...,2020,0.3


In [46]:
dorchester_neighbourhood_home = dorchester_neighbourhood.where("room_type == 'Entire home/apt'").toPandas()
fig = px.histogram(dorchester_neighbourhood_home, x="review_polarity", color='room_type')
fig.show()

In [47]:
dorchester_neighbourhood_private = dorchester_neighbourhood.where("room_type == 'Private room'").toPandas()
fig = px.histogram(dorchester_neighbourhood_private, x="review_polarity", color='room_type')
fig.show()

In [48]:
dorchester_neighbourhood_shared = dorchester_neighbourhood.where("room_type == 'Shared room'").toPandas()
fig = px.histogram(dorchester_neighbourhood_shared, x="review_polarity", color='room_type')
fig.show()