In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark import SparkFiles

In [2]:
spark = SparkSession\
        .builder\
        .master("local")\
        .config("spark.files.overwrite", True)\
        .getOrCreate()

In [3]:
boston_reviews_df = spark.read.csv("data/Boston/2020/June/reviews.csv", header=True)
boston_reviews_df = boston_reviews_df.withColumnRenamed("id", "review_id")
boston_reviews_df.toPandas()

Unnamed: 0,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
179901,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
179902,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
179903,"At my time of stay (05/20) fees were exorbitant.""",,,,,
179904,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [5]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.count()

3446

In [6]:
boston_listings_df.createOrReplaceTempView("boston_listings")

In [7]:
spark.sql("SELECT * FROM boston_listings WHERE id='3781'").toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97


In [63]:
boston_reviews_df.createOrReplaceTempView("boston_reviews")

In [64]:
spark.sql("SELECT comments FROM boston_reviews WHERE listing_id='3781'").toPandas()

Unnamed: 0,comments
0,The apartment was as advertised and Frank was ...
1,It was a pleasure to stay at Frank's place. Th...
2,The apartment description is entirely faithful...
3,Thoroughly enjoyed my time at Frank's home. Ha...
4,Great value for the money! This location has e...
5,Frank was very accommodating throughout my sta...
6,Excellent! This was my second stay at the Jeff...
7,Frank was a wonderful and accommodating host. ...
8,"Frank was great, the apartment has everything ..."
9,Very nice. Comfortable apartment. Good locatio...


In [8]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [37]:
boston_neighbourhoods_df = spark.read.csv("data/Boston/2020/June/neighbourhoods.csv", header=True)
boston_neighbourhoods_df.toPandas()

Unnamed: 0,neighbourhood_group,neighbourhood
0,,Allston
1,,Back Bay
2,,Bay Village
3,,Beacon Hill
4,,Brighton
5,,Charlestown
6,,Chinatown
7,,Dorchester
8,,Downtown
9,,East Boston


In [9]:
neighbourhoods_list = ["Back Bay", 'South Boston', 'South End', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [10]:
boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,South End,22354,COPLEY SQ...19TH C.TWNHSE SUNNY RM,85770,Robert,,42.34496,-71.07486,Private room,148,2,316,2019-12-13,2.93,5,90
1,South End,225224,4th floor office room,85770,Robert,,42.34643,-71.07503,Private room,105,2,318,2020-03-21,3.02,5,90
2,South End,322593,"Near Hospitals, 28 Day Bking Only,OutdoorSpace",1651480,Edward,,42.34232,-71.07594,Entire home/apt,125,28,408,2020-03-31,4.65,4,91
3,South End,526970,BACK BAY/COPLEY FIRST FLOOR ROOM,85770,Robert,,42.34523,-71.07282,Private room,148,2,175,2019-10-31,1.80,5,90
4,South End,798957,"★Discounted★ NearHospitals★2floors★ 2 Baths,2beds",1651480,Edward,,42.34232,-71.07594,Entire home/apt,347,28,417,2020-04-30,4.82,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,South Boston,43102656,Large bedroom in shared apartment with two males,10778805,Paul,,42.32958,-71.05638,Private room,50,90,0,,,3,363
1663,South Boston,43137883,Spacious Queen Room in South Boston,297860058,June,,42.33393,-71.02503,Private room,48,30,0,,,31,364
1664,South Boston,43638802,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34313,-71.05606,Private room,90,28,0,,,92,141
1665,South Boston,43639005,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34301,-71.0573,Private room,90,28,0,,,92,144


In [11]:
boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == boston_reviews_df["listing_id"], "left")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64851,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64852,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64853,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64854,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [26]:
sentence = list(boston_reviews_df.select("comments").head(1)[0])
sentence

['The apartment was as advertised and Frank was incredibly helpful through the entire process. I would definitely recommend this place.']

In [29]:
from textblob import TextBlob

In [46]:
TextBlob(sentence[0]).sentiment

Sentiment(polarity=0.3, subjectivity=0.6749999999999999)

In [65]:
sentence = list(boston_reviews_df.where("listing_id == 15127120").select("comments").head(2)[1])
TextBlob(sentence[0]).translate(to= 'en')

TextBlob("Some of the best possible location in Boston. Small apartment but there is everything you need. New and tidy.")

In [49]:
dorchester_neighbourhood_df = boston_top_neighbourhood.where("neighbourhood == 'Dorchester'")
dorchester_neighbourhood_df.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,228392128,2018-01-19,162953953,Samuel,Really a nice stay. Warm accommodations. A lot...
1,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,232850966,2018-02-06,108441156,Loise Coleen,This place is a wonderful value in a very nice...
2,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,249045170,2018-04-01,50884175,Marcelo,"Eder, is the best host I had so far! He’s amaz..."
3,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,276689352,2018-06-14,126237644,Gloria,La casa è molto confortevole e il marito di E...
4,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,279053437,2018-06-19,140123458,F,"Located close to Boston, this place is a home ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19511,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,571718747,2019-12-01,195425991,Janet,This is a beautiful old house. But maybe some ...
19512,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,601081009,2020-02-02,88231857,Deniz,We stayed at Joe’s place during our Boston tri...
19513,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,607736226,2020-02-18,14250998,Raquel,"It's a beautiful space, roomy, clean, and comf..."
19514,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,613056113,2020-03-01,326248264,Liam,Very nice place to stay if you’re looking to s...


In [50]:
dorchester_neighbourhood_df.createOrReplaceTempView("dorchester_table")

In [54]:
spark.sql("""SELECT room_type, count(room_type), ROUND(avg(price), 2) as average_price 
            FROM dorchester_table
            GROUP BY room_type
            ORDER BY average_price""").show()

+---------------+----------------+-------------+
|      room_type|count(room_type)|average_price|
+---------------+----------------+-------------+
|   Private room|           15297|        82.21|
|     Hotel room|             153|       130.36|
|Entire home/apt|            4066|       149.55|
+---------------+----------------+-------------+



In [3]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"

In [4]:
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"

In [5]:
spark.sparkContext.addFile(reviews_url)
spark.sparkContext.addFile(listings_url)

In [6]:
reviews_df = spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz"))
reviews_df.toPandas()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
179901,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
179902,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
179903,"At my time of stay (05/20) fees were exorbitant.""",,,,,
179904,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [8]:
listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
listings_df.toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [14]:
reviews_df = reviews_df.where("comments != 'None'")
reviews_df = reviews_df.withColumnRenamed("id", "review_id")
reviews_df.toPandas()

Unnamed: 0,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
142256,43257845,625121495,2020-05-17,342995796,Christi,Great stay!
142257,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
142258,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
142259,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [3]:
neighbourhoods_list = ["Back Bay", 'South Boston', 'South End', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [12]:
boston_top_neighbourhood = listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,South End,22354,COPLEY SQ...19TH C.TWNHSE SUNNY RM,85770,Robert,,42.34496,-71.07486,Private room,148,2,316,2019-12-13,2.93,5,90
1,South End,225224,4th floor office room,85770,Robert,,42.34643,-71.07503,Private room,105,2,318,2020-03-21,3.02,5,90
2,South End,322593,"Near Hospitals, 28 Day Bking Only,OutdoorSpace",1651480,Edward,,42.34232,-71.07594,Entire home/apt,125,28,408,2020-03-31,4.65,4,91
3,South End,526970,BACK BAY/COPLEY FIRST FLOOR ROOM,85770,Robert,,42.34523,-71.07282,Private room,148,2,175,2019-10-31,1.80,5,90
4,South End,798957,"★Discounted★ NearHospitals★2floors★ 2 Baths,2beds",1651480,Edward,,42.34232,-71.07594,Entire home/apt,347,28,417,2020-04-30,4.82,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,South Boston,43102656,Large bedroom in shared apartment with two males,10778805,Paul,,42.32958,-71.05638,Private room,50,90,0,,,3,363
1663,South Boston,43137883,Spacious Queen Room in South Boston,297860058,June,,42.33393,-71.02503,Private room,48,30,0,,,31,364
1664,South Boston,43638802,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34313,-71.05606,Private room,90,28,0,,,92,141
1665,South Boston,43639005,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34301,-71.0573,Private room,90,28,0,,,92,144


In [16]:
boston_top_neighbourhood = boston_top_neighbourhood.join(reviews_df, boston_top_neighbourhood["id"] == reviews_df["listing_id"], "left")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64820,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64821,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64822,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64823,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [6]:
def get_complete_boston_data(listings_url, reviews_url, year):
    spark.sparkContext.addFile(reviews_url)
    spark.sparkContext.addFile(listings_url)
    boston_reviews_df = spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz"))
    boston_listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
    boston_reviews_df = boston_reviews_df.where("comments != 'None'")
    boston_reviews_df = boston_reviews_df.withColumnRenamed("id", "review_id")
    boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
    boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == boston_reviews_df["listing_id"], "left")
    return boston_top_neighbourhood.withColumn("year", F.lit(year))

In [29]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"

boston_neighbourhood_2020 = get_complete_boston_data(listings_url, reviews_url, 2020)
boston_neighbourhood_2020.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,South End,8082512,"Columbus Ave By Maverick,ThirtyFour",22348222,Alicia,,42.34468465143812,-71.07663303521966,Entire home/apt,130,...,,43,333,,,,,,,2020
1,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,42843607,2015-08-16,31582449,David,This is a very nice house in one of Boston's n...,2020
2,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,37941666,2015-07-12,9598836,Adam,This is a beautiful home in the welcoming and ...,2020
3,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,36361195,2015-06-28,9598836,Adam,Wonderful place. Wonderful hosts.,2020
4,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,35486001,2015-06-19,15324211,Lisa,What a gem! The Koeppel's made us feel right ...,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17784,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,44590816,2015-08-27,41676224,Gavin,"Looks just like the photos, clean and well mai...",2020
17785,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,44217970,2015-08-24,2128023,Ainslie,Robert made us feel welcome from the first min...,2020
17786,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,43872862,2015-08-22,23258396,Woody,The house is cozy and nice. Very close to beac...,2020
17787,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,43565524,2015-08-20,41162578,Polly,This was a fantastic apartment right next the ...,2020


In [9]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/visualisations/listings.csv"
boston_neighbourhood_2019 = get_complete_boston_data(listings_url, reviews_url, 2019)
boston_neighbourhood_2019.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,70709969,2016-04-19,13807554,Juan,Last minute booking for me so I was not too pi...,2019
1,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,74906423,2016-05-17,31130305,Eva,Alan and Eddie were great hosts. We met Eddie ...,2019
2,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,75664635,2016-05-22,11668951,Rebecca,The location is absolutely perfect for Boston!...,2019
3,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,76616348,2016-05-28,29816733,Ivan,"Room was nice, but I wish I had known in advan...",2019
4,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,79381989,2016-06-12,67031876,Whitney,Alan and Edward were very great hosts! My chec...,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98824,South Boston,7475557,Minion? *Family*Massage Chair*Convention 37A,26704689,Mac,,42.33354,-71.04848,Entire home/apt,149,...,2.50,13,109,7475557,450934936,2019-05-10,256442391,David,"Shawn met us at the house at 10pm, late flight...",2019
98825,South Boston,7475557,Minion? *Family*Massage Chair*Convention 37A,26704689,Mac,,42.33354,-71.04848,Entire home/apt,149,...,2.50,13,109,7475557,453341445,2019-05-15,12953324,Jefferson,"Amazing place, everything literally perfect!",2019
98826,South Boston,7475557,Minion? *Family*Massage Chair*Convention 37A,26704689,Mac,,42.33354,-71.04848,Entire home/apt,149,...,2.50,13,109,7475557,457260738,2019-05-23,28724502,Kass,Good stay,2019
98827,South Boston,7475557,Minion? *Family*Massage Chair*Convention 37A,26704689,Mac,,42.33354,-71.04848,Entire home/apt,149,...,2.50,13,109,7475557,461431262,2019-05-31,262819173,Elicia,Mac's place was just as described. We enjoyed ...,2019


In [10]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/visualisations/listings.csv"
boston_neighbourhood_2018 = get_complete_boston_data(listings_url, reviews_url, 2018)
boston_neighbourhood_2018.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,South Boston,11757376,MINUTES TO SEAPORT/CONVENTION CTR W/ PRIVATE BATH,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,75,...,3.03,5,283,11757376,70501196,2016-04-18,28675389,Johan,Det här var det bästa boendet jag någonsin haf...,2018
1,South Boston,11757376,MINUTES TO SEAPORT/CONVENTION CTR W/ PRIVATE BATH,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,75,...,3.03,5,283,11757376,71236001,2016-04-24,26069560,Brock,Michael was a terrific host. Accommodating and...,2018
2,South Boston,11757376,MINUTES TO SEAPORT/CONVENTION CTR W/ PRIVATE BATH,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,75,...,3.03,5,283,11757376,72207528,2016-05-01,67998914,Kinzie,Michael was a great a host! The space was beau...,2018
3,South Boston,11757376,MINUTES TO SEAPORT/CONVENTION CTR W/ PRIVATE BATH,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,75,...,3.03,5,283,11757376,72916194,2016-05-05,22825492,Huong (Kelly),My friend and I had a great time staying at Mi...,2018
4,South Boston,11757376,MINUTES TO SEAPORT/CONVENTION CTR W/ PRIVATE BATH,54099497,Michael,,42.336445599409245,-71.04048656749968,Private room,75,...,3.03,5,283,11757376,81240143,2016-06-21,70000821,Kelly,"Mike was a great host! When we arrived, he wel...",2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77213,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,2.51,1,85,9237653,206255669,2017-10-24,25458324,Thomas,"Vanessa is a great host, her place is cosy and...",2018
77214,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,2.51,1,85,9237653,207631802,2017-10-29,22644331,Rita Claudia,Cozy apartment with a very lively hostess. Goo...,2018
77215,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,2.51,1,85,9237653,210665523,2017-11-10,59254242,Christian,"This was a very comfortable, clean and private...",2018
77216,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,2.51,1,85,9237653,212994935,2017-11-19,72834870,Urara,Vanessa sent me a photo of the entrance before...,2018


In [11]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/visualisations/listings.csv"
boston_neighbourhood_2017 = get_complete_boston_data(listings_url, reviews_url, 2017)
boston_neighbourhood_2017.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,113687242,2016-11-13,103088025,Gerri,We had a great stay at Allison's home. Her pl...,2017
1,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,121702696,2016-12-18,105866458,Jennifer,Staying in Allison's home was such a wonderful...,2017
2,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,132845450,2017-02-19,116909212,Brenna,Allisons place was perfect! Especially if you ...,2017
3,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,136863198,2017-03-12,118336543,Peter,"""This place was amazing, I'd even go as far as...",2017
4,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,138393020,2017-03-19,110442436,Nicole,The house was a little out of the way for the ...,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53673,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,3.15,1,341,9237653,197060923,2017-09-24,150055599,健,"很舒适的地方,接近城区,出去逛街购物很方便。",2017
53674,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,3.15,1,341,9237653,198198986,2017-09-28,148035227,Luca,"Vanessa is very nice, helpful and super easy. ...",2017
53675,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,3.15,1,341,9237653,199298897,2017-10-01,105075037,Torrie,We had a lovely stay. The place is clean and p...,2017
53676,South Boston,9237653,Minutes to Downtown. Modern Room W/ Private Bath!,20684898,Vanessa,,42.33193066863032,-71.05043230393528,Private room,120,...,3.15,1,341,9237653,200242606,2017-10-04,21364994,Edward,Very nice place in a very nice neighborhood. P...,2017


In [12]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/visualisations/listings.csv"
boston_neighbourhood_2016 = get_complete_boston_data(listings_url, reviews_url, 2016)
boston_neighbourhood_2016.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,South End,9896359,Sunny Top Floor Apartment,170480,Chloe,,42.34458195041722,-71.07551295903801,Entire home/apt,195,...,0.66,1,272,9896359,73661948,2016-05-09,54785955,Lori,The apt was exactly as described. The natural...,2016
1,South End,9896359,Sunny Top Floor Apartment,170480,Chloe,,42.34458195041722,-71.07551295903801,Entire home/apt,195,...,0.66,1,272,9896359,70426957,2016-04-17,30319006,Ryan,The apartment was beautiful and located in a g...,2016
2,South End,9896359,Sunny Top Floor Apartment,170480,Chloe,,42.34458195041722,-71.07551295903801,Entire home/apt,195,...,0.66,1,272,9896359,69501223,2016-04-11,58356376,Jessica Lynn,I really enjoyed my stay at Chloe's beautiful ...,2016
3,South End,9896359,Sunny Top Floor Apartment,170480,Chloe,,42.34458195041722,-71.07551295903801,Entire home/apt,195,...,0.66,1,272,9896359,68294907,2016-04-04,12170013,Line,A very beautiful appartment close to everythin...,2016
4,South End,9896359,Sunny Top Floor Apartment,170480,Chloe,,42.34458195041722,-71.07551295903801,Entire home/apt,195,...,0.66,1,272,9896359,60394995,2016-01-23,21806944,Sara,My husband and I stayed in Chloe's apartment f...,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30045,South Boston,5384923,Historic Charm with Modern Details,27840245,Jinny,,42.33775519216514,-71.047436675075,Entire home/apt,175,...,1.23,1,304,5384923,53214434,2015-11-06,47649170,Daniel,This was a wonderful stay. The location and ac...,2016
30046,South Boston,5384923,Historic Charm with Modern Details,27840245,Jinny,,42.33775519216514,-71.047436675075,Entire home/apt,175,...,1.23,1,304,5384923,52484733,2015-10-30,22533837,Alex,Awesome place. My best airb&b experience. Ver...,2016
30047,South Boston,5384923,Historic Charm with Modern Details,27840245,Jinny,,42.33775519216514,-71.047436675075,Entire home/apt,175,...,1.23,1,304,5384923,51538951,2015-10-21,42991108,Jong-Woo,Dear Jinny . I am JW Choi. We came back to Se...,2016
30048,South Boston,5384923,Historic Charm with Modern Details,27840245,Jinny,,42.33775519216514,-71.047436675075,Entire home/apt,175,...,1.23,1,304,5384923,50184860,2015-10-10,24466677,Evgeny,We had absolutely 2 wonderful weeks in this ap...,2016


In [13]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/visualisations/listings.csv"
boston_neighbourhood_2015 = get_complete_boston_data(listings_url, reviews_url, 2015)
boston_neighbourhood_2015.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,South End,8082512,"Columbus Ave By Maverick,ThirtyFour",22348222,Alicia,,42.34468465143812,-71.07663303521966,Entire home/apt,130,...,,43,333,,,,,,,2015
1,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,42843607,2015-08-16,31582449,David,This is a very nice house in one of Boston's n...,2015
2,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,37941666,2015-07-12,9598836,Adam,This is a beautiful home in the welcoming and ...,2015
3,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,36361195,2015-06-28,9598836,Adam,Wonderful place. Wonderful hosts.,2015
4,South End,1810397,3Bed/2.5Bath Luxury Triplex Boston,9485021,Stacy,,42.34601867841806,-71.06761801053976,Entire home/apt,800,...,1.17,1,358,1810397,35486001,2015-06-19,15324211,Lisa,What a gem! The Koeppel's made us feel right ...,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17784,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,44590816,2015-08-27,41676224,Gavin,"Looks just like the photos, clean and well mai...",2015
17785,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,44217970,2015-08-24,2128023,Ainslie,Robert made us feel welcome from the first min...,2015
17786,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,43872862,2015-08-22,23258396,Woody,The house is cozy and nice. Very close to beac...,2015
17787,South Boston,7713728,BEST OF BOSTON! 3BD NEAR BCEC!,34916087,Robert,,42.33235823445015,-71.03139654309788,Entire home/apt,275,...,11.88,1,317,7713728,43565524,2015-08-20,41162578,Polly,This was a fantastic apartment right next the ...,2015


In [26]:
dorchester_neighbourhood = boston_neighbourhood_2015.where("neighbourhood == 'Fenway'").union(boston_neighbourhood_2016.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2017.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2018.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2019.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2020.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Fenway,8552387,Bright Back Bay Brownstone,28880663,Amanda,,42.35117407918826,-71.08998038575537,Entire home/apt,225,...,,1,9,,,,,,,2015
1,Fenway,8469841,Large Room Back Bay Close to MIT,16953664,Ife,,42.3510130004383,-71.09012784825632,Private room,90,...,1,3,346,8469841,48817108,2015-09-28,44610837,Alan,I had a great 1st experience using airbnb. If...,2015
2,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,47402256,2015-09-18,4228486,Natalie,"The apartment is clean, comfortable and very ...",2015
3,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,44984587,2015-08-30,36039829,Marco,I booked Lian's flat for my parents and they r...,2015
4,Fenway,5086521,Modern & spacious studio for two,1868179,Ryan,,42.34086101708686,-71.08828863604951,Entire home/apt,150,...,1.55,1,130,5086521,46117564,2015-09-08,31812081,Maximilian,"The place was exactly as described, location w...",2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9073,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,30315640,2015-04-20,28743839,Nanci,The description of this place is (email hidden...,2020
9074,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,32567096,2015-05-19,15747050,Ann,A very lovely place in a perfect location. Ne...,2020
9075,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,33313882,2015-05-26,26989637,Prashant,Jennifer was really helpful and provided me an...,2020
9076,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,37398353,2015-07-06,33661506,Kathleen,Jennifer and the apartment were great! At firs...,2020


In [19]:
dorchester_neighbourhood.createOrReplaceTempView("neighbourhood")

In [24]:
spark.sql("""
    SELECT room_type, year, COUNT(room_type), ROUND(avg(price), 2) as average_price
    FROM neighbourhood
    GROUP BY room_type, year
    ORDER BY year
""").toPandas()

Unnamed: 0,room_type,year,count(room_type),average_price
0,Private room,2015,3735,76.12
1,Entire home/apt,2015,571,165.56
2,Shared room,2015,16,73.13
3,Private room,2016,3735,76.12
4,Shared room,2016,16,73.13
5,Entire home/apt,2016,571,165.56
6,Shared room,2017,16,73.13
7,Entire home/apt,2017,571,165.56
8,Private room,2017,3735,76.12
9,Entire home/apt,2018,571,165.56


In [28]:
boston_neighbourhood_2020.count()

17789