In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark import SparkFiles

In [2]:
spark = SparkSession\
        .builder\
        .master("local")\
        .config("spark.files.overwrite", True)\
        .getOrCreate()

In [3]:
boston_reviews_df = spark.read.csv("data/Boston/2020/June/reviews.csv", header=True)
boston_reviews_df = boston_reviews_df.withColumnRenamed("id", "review_id")
boston_reviews_df.toPandas()

Unnamed: 0,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
179901,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
179902,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
179903,"At my time of stay (05/20) fees were exorbitant.""",,,,,
179904,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [5]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.count()

3446

In [6]:
boston_listings_df.createOrReplaceTempView("boston_listings")

In [7]:
spark.sql("SELECT * FROM boston_listings WHERE id='3781'").toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97


In [63]:
boston_reviews_df.createOrReplaceTempView("boston_reviews")

In [64]:
spark.sql("SELECT comments FROM boston_reviews WHERE listing_id='3781'").toPandas()

Unnamed: 0,comments
0,The apartment was as advertised and Frank was ...
1,It was a pleasure to stay at Frank's place. Th...
2,The apartment description is entirely faithful...
3,Thoroughly enjoyed my time at Frank's home. Ha...
4,Great value for the money! This location has e...
5,Frank was very accommodating throughout my sta...
6,Excellent! This was my second stay at the Jeff...
7,Frank was a wonderful and accommodating host. ...
8,"Frank was great, the apartment has everything ..."
9,Very nice. Comfortable apartment. Good locatio...


In [8]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [37]:
boston_neighbourhoods_df = spark.read.csv("data/Boston/2020/June/neighbourhoods.csv", header=True)
boston_neighbourhoods_df.toPandas()

Unnamed: 0,neighbourhood_group,neighbourhood
0,,Allston
1,,Back Bay
2,,Bay Village
3,,Beacon Hill
4,,Brighton
5,,Charlestown
6,,Chinatown
7,,Dorchester
8,,Downtown
9,,East Boston


In [9]:
neighbourhoods_list = ["Back Bay", 'South Boston', 'South End', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [10]:
boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,South End,22354,COPLEY SQ...19TH C.TWNHSE SUNNY RM,85770,Robert,,42.34496,-71.07486,Private room,148,2,316,2019-12-13,2.93,5,90
1,South End,225224,4th floor office room,85770,Robert,,42.34643,-71.07503,Private room,105,2,318,2020-03-21,3.02,5,90
2,South End,322593,"Near Hospitals, 28 Day Bking Only,OutdoorSpace",1651480,Edward,,42.34232,-71.07594,Entire home/apt,125,28,408,2020-03-31,4.65,4,91
3,South End,526970,BACK BAY/COPLEY FIRST FLOOR ROOM,85770,Robert,,42.34523,-71.07282,Private room,148,2,175,2019-10-31,1.80,5,90
4,South End,798957,"★Discounted★ NearHospitals★2floors★ 2 Baths,2beds",1651480,Edward,,42.34232,-71.07594,Entire home/apt,347,28,417,2020-04-30,4.82,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,South Boston,43102656,Large bedroom in shared apartment with two males,10778805,Paul,,42.32958,-71.05638,Private room,50,90,0,,,3,363
1663,South Boston,43137883,Spacious Queen Room in South Boston,297860058,June,,42.33393,-71.02503,Private room,48,30,0,,,31,364
1664,South Boston,43638802,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34313,-71.05606,Private room,90,28,0,,,92,141
1665,South Boston,43639005,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34301,-71.0573,Private room,90,28,0,,,92,144


In [11]:
boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == boston_reviews_df["listing_id"], "left")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64851,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64852,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64853,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64854,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [26]:
sentence = list(boston_reviews_df.select("comments").head(1)[0])
sentence

['The apartment was as advertised and Frank was incredibly helpful through the entire process. I would definitely recommend this place.']

In [29]:
from textblob import TextBlob

In [46]:
TextBlob(sentence[0]).sentiment

Sentiment(polarity=0.3, subjectivity=0.6749999999999999)

In [65]:
sentence = list(boston_reviews_df.where("listing_id == 15127120").select("comments").head(2)[1])
TextBlob(sentence[0]).translate(to= 'en')

TextBlob("Some of the best possible location in Boston. Small apartment but there is everything you need. New and tidy.")

In [49]:
dorchester_neighbourhood_df = boston_top_neighbourhood.where("neighbourhood == 'Dorchester'")
dorchester_neighbourhood_df.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,228392128,2018-01-19,162953953,Samuel,Really a nice stay. Warm accommodations. A lot...
1,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,232850966,2018-02-06,108441156,Loise Coleen,This place is a wonderful value in a very nice...
2,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,249045170,2018-04-01,50884175,Marcelo,"Eder, is the best host I had so far! He’s amaz..."
3,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,276689352,2018-06-14,126237644,Gloria,La casa è molto confortevole e il marito di E...
4,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,279053437,2018-06-19,140123458,F,"Located close to Boston, this place is a home ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19511,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,571718747,2019-12-01,195425991,Janet,This is a beautiful old house. But maybe some ...
19512,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,601081009,2020-02-02,88231857,Deniz,We stayed at Joe’s place during our Boston tri...
19513,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,607736226,2020-02-18,14250998,Raquel,"It's a beautiful space, roomy, clean, and comf..."
19514,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,613056113,2020-03-01,326248264,Liam,Very nice place to stay if you’re looking to s...


In [50]:
dorchester_neighbourhood_df.createOrReplaceTempView("dorchester_table")

In [54]:
spark.sql("""SELECT room_type, count(room_type), ROUND(avg(price), 2) as average_price 
            FROM dorchester_table
            GROUP BY room_type
            ORDER BY average_price""").show()

+---------------+----------------+-------------+
|      room_type|count(room_type)|average_price|
+---------------+----------------+-------------+
|   Private room|           15297|        82.21|
|     Hotel room|             153|       130.36|
|Entire home/apt|            4066|       149.55|
+---------------+----------------+-------------+



In [3]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"

In [4]:
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"

In [5]:
spark.sparkContext.addFile(reviews_url)
spark.sparkContext.addFile(listings_url)

In [6]:
reviews_df = spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz"))
reviews_df.toPandas()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
179901,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
179902,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
179903,"At my time of stay (05/20) fees were exorbitant.""",,,,,
179904,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [8]:
listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
listings_df.toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [14]:
reviews_df = reviews_df.where("comments != 'None'")
reviews_df = reviews_df.withColumnRenamed("id", "review_id")
reviews_df.toPandas()

Unnamed: 0,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
142256,43257845,625121495,2020-05-17,342995796,Christi,Great stay!
142257,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
142258,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
142259,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [11]:
neighbourhoods_list = ["Back Bay", 'South Boston', 'South End', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [12]:
boston_top_neighbourhood = listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,South End,22354,COPLEY SQ...19TH C.TWNHSE SUNNY RM,85770,Robert,,42.34496,-71.07486,Private room,148,2,316,2019-12-13,2.93,5,90
1,South End,225224,4th floor office room,85770,Robert,,42.34643,-71.07503,Private room,105,2,318,2020-03-21,3.02,5,90
2,South End,322593,"Near Hospitals, 28 Day Bking Only,OutdoorSpace",1651480,Edward,,42.34232,-71.07594,Entire home/apt,125,28,408,2020-03-31,4.65,4,91
3,South End,526970,BACK BAY/COPLEY FIRST FLOOR ROOM,85770,Robert,,42.34523,-71.07282,Private room,148,2,175,2019-10-31,1.80,5,90
4,South End,798957,"★Discounted★ NearHospitals★2floors★ 2 Baths,2beds",1651480,Edward,,42.34232,-71.07594,Entire home/apt,347,28,417,2020-04-30,4.82,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,South Boston,43102656,Large bedroom in shared apartment with two males,10778805,Paul,,42.32958,-71.05638,Private room,50,90,0,,,3,363
1663,South Boston,43137883,Spacious Queen Room in South Boston,297860058,June,,42.33393,-71.02503,Private room,48,30,0,,,31,364
1664,South Boston,43638802,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34313,-71.05606,Private room,90,28,0,,,92,141
1665,South Boston,43639005,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34301,-71.0573,Private room,90,28,0,,,92,144


In [16]:
boston_top_neighbourhood = boston_top_neighbourhood.join(reviews_df, boston_top_neighbourhood["id"] == reviews_df["listing_id"], "left")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64820,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64821,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64822,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64823,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [19]:
def get_complete_boston_data(listings_url, reviews_url):
    spark.sparkContext.addFile(reviews_url)
    spark.sparkContext.addFile(listings_url)
    boston_reviews_df = spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz"))
    boston_listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
    boston_reviews_df = reviews_df.where("comments != 'None'")
    boston_reviews_df = reviews_df.withColumnRenamed("id", "review_id")
    boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
    boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == reviews_df["listing_id"], "left")
    return boston_top_neighbourhood

In [20]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"

boston_neighbourhood_2020 = get_complete_boston_data(listings_url, reviews_url)
boston_neighbourhood_2020.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64820,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64821,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64822,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64823,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [21]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/visualisations/listings.csv"
boston_neighbourhood_2019 = get_complete_boston_data(listings_url, reviews_url)
boston_neighbourhood_2019.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,2018-10-22,0.70,9,362,,,,,,
1,Downtown,15127120,Sleek Studio in Downtown Crossing by Sonder,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,260,...,2019-04-02,1.62,296,336,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
2,Downtown,15127120,Sleek Studio in Downtown Crossing by Sonder,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,260,...,2019-04-02,1.62,296,336,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
3,Downtown,15127120,Sleek Studio in Downtown Crossing by Sonder,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,260,...,2019-04-02,1.62,296,336,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
4,Downtown,15127120,Sleek Studio in Downtown Crossing by Sonder,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,260,...,2019-04-02,1.62,296,336,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60923,Dorchester,35238309,Location location location and so beautiful!!!...,261646616,Joseph,,42.31584,-71.05796,Private room,500,...,,,3,85,35238309,613056113,2020-03-01,326248264,Liam,Very nice place to stay if you’re looking to s...
60924,Dorchester,35238309,Location location location and so beautiful!!!...,261646616,Joseph,,42.31584,-71.05796,Private room,500,...,,,3,85,35238309,615773999,2020-03-08,126410292,Douglas,Great spot to stay while in town
60925,Allston,35614764,Spacious 1 bedroom apartment on commonwealth ave,90175388,Alekya,,42.35299,-71.12243,Entire home/apt,200,...,,,1,25,,,,,,
60926,South Boston,7475557,Minion? *Family*Massage Chair*Convention 37A,26704689,Mac,,42.33354,-71.04848,Entire home/apt,149,...,2019-05-31,2.50,13,109,,,,,,


In [22]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/visualisations/listings.csv"
boston_neighbourhood_2018 = get_complete_boston_data(listings_url, reviews_url)
boston_neighbourhood_2018.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,South Boston,11757376,MINUTES TO SEAPORT/CONVENTION CTR W/ PRIVATE BATH,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,75,...,2018-07-08,3.03,5,283,,,,,,
1,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.350563870332486,-71.06565364219792,Entire home/apt,182,...,2018-06-25,0.88,9,349,,,,,,
2,Downtown,15127120,Sleek Studio in Downtown Crossing by Sonder,12243051,Sonder,,42.35461115834196,-71.05935438595657,Entire home/apt,129,...,2018-07-01,1.61,302,153,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
3,Downtown,15127120,Sleek Studio in Downtown Crossing by Sonder,12243051,Sonder,,42.35461115834196,-71.05935438595657,Entire home/apt,129,...,2018-07-01,1.61,302,153,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
4,Downtown,15127120,Sleek Studio in Downtown Crossing by Sonder,12243051,Sonder,,42.35461115834196,-71.05935438595657,Entire home/apt,129,...,2018-07-01,1.61,302,153,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54544,Dorchester,26138090,Comfortable room in Dorchester (#1),188413439,Hilson,,42.306501109039026,-71.06665534423358,Private room,65,...,2018-07-10,1,4,33,,,,,,
54545,Fenway,26935855,Amazing view of the apartme,194322480,Nhgio,,42.34572531468303,-71.08824820401912,Private room,204,...,,,1,362,,,,,,
54546,South Boston,7475557,*Family*Massage Chair*Tourist/Convention/Ocean...,26704689,Mac,,42.333535149704176,-71.04847848951837,Entire home/apt,149,...,2018-06-15,2.52,12,319,,,,,,
54547,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,2017-11-19,2.51,1,85,,,,,,


In [23]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/visualisations/listings.csv"
boston_neighbourhood_2017 = get_complete_boston_data(listings_url, reviews_url)
boston_neighbourhood_2017.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,2017-09-24,1.19,1,22,,,,,,
1,South Boston,11757376,SEAPORT AREA W/OWN BATH & QUEEN BED,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,104,...,2017-10-04,3.30,5,320,,,,,,
2,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.350563870332486,-71.06565364219792,Entire home/apt,182,...,2017-09-04,1.06,8,352,,,,,,
3,Downtown,15127120,Sleek Studio in Downtown Crossing,12243051,Sonder,,42.35461115834196,-71.05935438595657,Entire home/apt,129,...,2017-10-04,0.65,147,96,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
4,Downtown,15127120,Sleek Studio in Downtown Crossing,12243051,Sonder,,42.35461115834196,-71.05935438595657,Entire home/apt,129,...,2017-10-04,0.65,147,96,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46341,Dorchester,21148820,Private Apartment Close to Redline T Subway,18479599,Agnieszka,,42.325976820904145,-71.06016439244476,Entire home/apt,250,...,,,9,309,,,,,,
46342,Dorchester,21217710,Cozy Boston Room,153299935,NhuanKim,,42.31497905682507,-71.0564388777616,Private room,35,...,,,1,56,,,,,,
46343,South Boston,7475557,*Family*Massage Chair*Tourist/Convention/Ocean...,26704689,Mac,,42.333535149704176,-71.04847848951837,Entire home/apt,149,...,2017-10-02,3.07,11,345,,,,,,
46344,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,2017-10-04,3.15,1,341,,,,,,


In [24]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/visualisations/listings.csv"
boston_neighbourhood_2016 = get_complete_boston_data(listings_url, reviews_url)
boston_neighbourhood_2016.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,South Boston,11757376,SEAPORT AREA-2 BATHROOMS & PARKING,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,99,...,2016-07-31,1.68,6,299,11757376,70501196,2016-04-18,28675389,Johan,Det här var det bästa boendet jag någonsin haf...
1,South Boston,11757376,SEAPORT AREA-2 BATHROOMS & PARKING,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,99,...,2016-07-31,1.68,6,299,11757376,71236001,2016-04-24,26069560,Brock,Michael was a terrific host. Accommodating and...
2,South Boston,11757376,SEAPORT AREA-2 BATHROOMS & PARKING,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,99,...,2016-07-31,1.68,6,299,11757376,72207528,2016-05-01,67998914,Kinzie,Michael was a great a host! The space was beau...
3,South Boston,11757376,SEAPORT AREA-2 BATHROOMS & PARKING,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,99,...,2016-07-31,1.68,6,299,11757376,72916194,2016-05-05,22825492,Huong (Kelly),My friend and I had a great time staying at Mi...
4,South Boston,11757376,SEAPORT AREA-2 BATHROOMS & PARKING,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,99,...,2016-07-31,1.68,6,299,11757376,81240143,2016-06-21,70000821,Kelly,"Mike was a great host! When we arrived, he wel..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30045,Downtown,9299923,Beacon Hill 1BR near Downtown,12243051,Flatbook,,42.35844290828002,-71.06274136231362,Entire home/apt,139,...,2016-07-25,2.28,58,1,9299923,77901252,2016-06-04,37328025,Chris,Fifth floor condo (2 elevators) in an excellen...
30046,Downtown,9299923,Beacon Hill 1BR near Downtown,12243051,Flatbook,,42.35844290828002,-71.06274136231362,Entire home/apt,139,...,2016-07-25,2.28,58,1,9299923,79968676,2016-06-15,3817880,Jeff,Place was just as described. Great location j...
30047,Downtown,9299923,Beacon Hill 1BR near Downtown,12243051,Flatbook,,42.35844290828002,-71.06274136231362,Entire home/apt,139,...,2016-07-25,2.28,58,1,9299923,80441249,2016-06-18,3860053,Uluwehi,This location was convenient for first time vi...
30048,Downtown,9299923,Beacon Hill 1BR near Downtown,12243051,Flatbook,,42.35844290828002,-71.06274136231362,Entire home/apt,139,...,2016-07-25,2.28,58,1,9299923,88874032,2016-07-25,61523627,Mollie,Amazing location to explore historic Boston! S...


In [25]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/visualisations/listings.csv"
boston_neighbourhood_2015 = get_complete_boston_data(listings_url, reviews_url)
boston_neighbourhood_2015.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Fenway,1202945,"Heart of Boston, Fenway Park, 1 bed",6475647,Adam,,42.34141708440329,-71.08932417667182,Private room,129,...,2015-10-01,1.02,1,359,1202945,4929751,2013-06-02,2570335,Yi,"Fantastic location, huge bed, clean towels and..."
1,Fenway,1202945,"Heart of Boston, Fenway Park, 1 bed",6475647,Adam,,42.34141708440329,-71.08932417667182,Private room,129,...,2015-10-01,1.02,1,359,1202945,5066458,2013-06-10,6575647,Andrew,Adam was a great host. He even let us borrow a...
2,Fenway,1202945,"Heart of Boston, Fenway Park, 1 bed",6475647,Adam,,42.34141708440329,-71.08932417667182,Private room,129,...,2015-10-01,1.02,1,359,1202945,5268428,2013-06-21,6194019,Dominic,"The place was immaculate, all cleaned EVERYTHI..."
3,Fenway,1202945,"Heart of Boston, Fenway Park, 1 bed",6475647,Adam,,42.34141708440329,-71.08932417667182,Private room,129,...,2015-10-01,1.02,1,359,1202945,5491017,2013-07-02,6591400,Angelica,"We had a great time in Boston, thanks in no sm..."
4,Fenway,1202945,"Heart of Boston, Fenway Park, 1 bed",6475647,Adam,,42.34141708440329,-71.08932417667182,Private room,129,...,2015-10-01,1.02,1,359,1202945,5720779,2013-07-13,4653468,G. And S.,"Adam has a nice apartment, well located for th..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17784,South End,7114655,South End Artist Loft in SOWA,271917,Sheryl,,42.3433555536779,-71.06571608927693,Entire home/apt,300,...,2015-09-07,1.50,1,82,7114655,44028449,2015-08-23,12614949,Amanda,"""We had a great experience in this south end l..."
17785,South End,7114655,South End Artist Loft in SOWA,271917,Sheryl,,42.3433555536779,-71.06571608927693,Entire home/apt,300,...,2015-09-07,1.50,1,82,7114655,46049212,2015-09-07,13032983,Mattias,Sheryl with family greeted us upon our arrival...
17786,South End,7511737,"Columbus Avenue By Maverick, Seven",22348222,Alicia,,42.34422218579844,-71.07537505592731,Entire home/apt,143,...,,,43,285,,,,,,
17787,Allston,8256917,"Gorgeous, clean bedroom in Allston",34456312,Alan,,42.348363142876,-71.138695311476,Private room,115,...,2015-09-30,1,3,355,8256917,49040493,2015-09-30,5053497,Kim,My daughter and her girl friend loved the room...
