In [27]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import types as T
from pyspark import SparkFiles
from textblob import TextBlob

In [19]:
spark = SparkSession\
        .builder\
        .master("local")\
        .config("spark.files.overwrite", True)\
        .getOrCreate()

In [3]:
boston_reviews_df = spark.read.csv("data/Boston/2020/June/reviews.csv", header=True)
boston_reviews_df = boston_reviews_df.withColumnRenamed("id", "review_id")
boston_reviews_df.toPandas()

Unnamed: 0,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
179901,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
179902,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
179903,"At my time of stay (05/20) fees were exorbitant.""",,,,,
179904,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [5]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.count()

3446

In [6]:
boston_listings_df.createOrReplaceTempView("boston_listings")

In [7]:
spark.sql("SELECT * FROM boston_listings WHERE id='3781'").toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97


In [63]:
boston_reviews_df.createOrReplaceTempView("boston_reviews")

In [64]:
spark.sql("SELECT comments FROM boston_reviews WHERE listing_id='3781'").toPandas()

Unnamed: 0,comments
0,The apartment was as advertised and Frank was ...
1,It was a pleasure to stay at Frank's place. Th...
2,The apartment description is entirely faithful...
3,Thoroughly enjoyed my time at Frank's home. Ha...
4,Great value for the money! This location has e...
5,Frank was very accommodating throughout my sta...
6,Excellent! This was my second stay at the Jeff...
7,Frank was a wonderful and accommodating host. ...
8,"Frank was great, the apartment has everything ..."
9,Very nice. Comfortable apartment. Good locatio...


In [8]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [37]:
boston_neighbourhoods_df = spark.read.csv("data/Boston/2020/June/neighbourhoods.csv", header=True)
boston_neighbourhoods_df.toPandas()

Unnamed: 0,neighbourhood_group,neighbourhood
0,,Allston
1,,Back Bay
2,,Bay Village
3,,Beacon Hill
4,,Brighton
5,,Charlestown
6,,Chinatown
7,,Dorchester
8,,Downtown
9,,East Boston


In [9]:
neighbourhoods_list = ["Back Bay", 'South Boston', 'South End', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [10]:
boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,South End,22354,COPLEY SQ...19TH C.TWNHSE SUNNY RM,85770,Robert,,42.34496,-71.07486,Private room,148,2,316,2019-12-13,2.93,5,90
1,South End,225224,4th floor office room,85770,Robert,,42.34643,-71.07503,Private room,105,2,318,2020-03-21,3.02,5,90
2,South End,322593,"Near Hospitals, 28 Day Bking Only,OutdoorSpace",1651480,Edward,,42.34232,-71.07594,Entire home/apt,125,28,408,2020-03-31,4.65,4,91
3,South End,526970,BACK BAY/COPLEY FIRST FLOOR ROOM,85770,Robert,,42.34523,-71.07282,Private room,148,2,175,2019-10-31,1.80,5,90
4,South End,798957,"★Discounted★ NearHospitals★2floors★ 2 Baths,2beds",1651480,Edward,,42.34232,-71.07594,Entire home/apt,347,28,417,2020-04-30,4.82,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,South Boston,43102656,Large bedroom in shared apartment with two males,10778805,Paul,,42.32958,-71.05638,Private room,50,90,0,,,3,363
1663,South Boston,43137883,Spacious Queen Room in South Boston,297860058,June,,42.33393,-71.02503,Private room,48,30,0,,,31,364
1664,South Boston,43638802,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34313,-71.05606,Private room,90,28,0,,,92,141
1665,South Boston,43639005,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34301,-71.0573,Private room,90,28,0,,,92,144


In [11]:
boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == boston_reviews_df["listing_id"], "left")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64851,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64852,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64853,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64854,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [26]:
sentence = list(boston_reviews_df.select("comments").head(1)[0])
sentence

['The apartment was as advertised and Frank was incredibly helpful through the entire process. I would definitely recommend this place.']

In [29]:
from textblob import TextBlob

In [46]:
TextBlob(sentence[0]).sentiment

Sentiment(polarity=0.3, subjectivity=0.6749999999999999)

In [65]:
sentence = list(boston_reviews_df.where("listing_id == 15127120").select("comments").head(2)[1])
TextBlob(sentence[0]).translate(to= 'en')

TextBlob("Some of the best possible location in Boston. Small apartment but there is everything you need. New and tidy.")

In [49]:
dorchester_neighbourhood_df = boston_top_neighbourhood.where("neighbourhood == 'Dorchester'")
dorchester_neighbourhood_df.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,228392128,2018-01-19,162953953,Samuel,Really a nice stay. Warm accommodations. A lot...
1,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,232850966,2018-02-06,108441156,Loise Coleen,This place is a wonderful value in a very nice...
2,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,249045170,2018-04-01,50884175,Marcelo,"Eder, is the best host I had so far! He’s amaz..."
3,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,276689352,2018-06-14,126237644,Gloria,La casa è molto confortevole e il marito di E...
4,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,279053437,2018-06-19,140123458,F,"Located close to Boston, this place is a home ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19511,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,571718747,2019-12-01,195425991,Janet,This is a beautiful old house. But maybe some ...
19512,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,601081009,2020-02-02,88231857,Deniz,We stayed at Joe’s place during our Boston tri...
19513,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,607736226,2020-02-18,14250998,Raquel,"It's a beautiful space, roomy, clean, and comf..."
19514,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,613056113,2020-03-01,326248264,Liam,Very nice place to stay if you’re looking to s...


In [50]:
dorchester_neighbourhood_df.createOrReplaceTempView("dorchester_table")

In [54]:
spark.sql("""SELECT room_type, count(room_type), ROUND(avg(price), 2) as average_price 
            FROM dorchester_table
            GROUP BY room_type
            ORDER BY average_price""").show()

+---------------+----------------+-------------+
|      room_type|count(room_type)|average_price|
+---------------+----------------+-------------+
|   Private room|           15297|        82.21|
|     Hotel room|             153|       130.36|
|Entire home/apt|            4066|       149.55|
+---------------+----------------+-------------+



In [3]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"

In [4]:
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"

In [5]:
spark.sparkContext.addFile(reviews_url)
spark.sparkContext.addFile(listings_url)

In [6]:
reviews_df = spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz"))
reviews_df.toPandas()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
179901,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
179902,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
179903,"At my time of stay (05/20) fees were exorbitant.""",,,,,
179904,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [8]:
listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
listings_df.toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [14]:
reviews_df = reviews_df.where("comments != 'None'")
reviews_df = reviews_df.withColumnRenamed("id", "review_id")
reviews_df.toPandas()

Unnamed: 0,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
142256,43257845,625121495,2020-05-17,342995796,Christi,Great stay!
142257,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
142258,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
142259,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [5]:
neighbourhoods_list = ['Mission Hill', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [12]:
boston_top_neighbourhood = listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,South End,22354,COPLEY SQ...19TH C.TWNHSE SUNNY RM,85770,Robert,,42.34496,-71.07486,Private room,148,2,316,2019-12-13,2.93,5,90
1,South End,225224,4th floor office room,85770,Robert,,42.34643,-71.07503,Private room,105,2,318,2020-03-21,3.02,5,90
2,South End,322593,"Near Hospitals, 28 Day Bking Only,OutdoorSpace",1651480,Edward,,42.34232,-71.07594,Entire home/apt,125,28,408,2020-03-31,4.65,4,91
3,South End,526970,BACK BAY/COPLEY FIRST FLOOR ROOM,85770,Robert,,42.34523,-71.07282,Private room,148,2,175,2019-10-31,1.80,5,90
4,South End,798957,"★Discounted★ NearHospitals★2floors★ 2 Baths,2beds",1651480,Edward,,42.34232,-71.07594,Entire home/apt,347,28,417,2020-04-30,4.82,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,South Boston,43102656,Large bedroom in shared apartment with two males,10778805,Paul,,42.32958,-71.05638,Private room,50,90,0,,,3,363
1663,South Boston,43137883,Spacious Queen Room in South Boston,297860058,June,,42.33393,-71.02503,Private room,48,30,0,,,31,364
1664,South Boston,43638802,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34313,-71.05606,Private room,90,28,0,,,92,141
1665,South Boston,43639005,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34301,-71.0573,Private room,90,28,0,,,92,144


In [16]:
boston_top_neighbourhood = boston_top_neighbourhood.join(reviews_df, boston_top_neighbourhood["id"] == reviews_df["listing_id"], "left")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64820,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64821,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64822,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64823,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [6]:
def get_complete_boston_data(listings_url, reviews_url, year):
    spark.sparkContext.addFile(reviews_url)
    spark.sparkContext.addFile(listings_url)
    boston_reviews_df = spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz"))
    boston_listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
    boston_reviews_df = boston_reviews_df.where("comments != 'None'")
    boston_reviews_df = boston_reviews_df.withColumnRenamed("id", "review_id")
    boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
    boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == boston_reviews_df["listing_id"], "left")
    return boston_top_neighbourhood.withColumn("year", F.lit(year))

In [7]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"

boston_neighbourhood_2020 = get_complete_boston_data(listings_url, reviews_url, 2020)
boston_neighbourhood_2020.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...,2020
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...,2020
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever...",2020
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay.",2020
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38778,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento,2020
38779,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...,2020
38780,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...,2020
38781,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value,2020


In [8]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2019-06-14/visualisations/listings.csv"
boston_neighbourhood_2019 = get_complete_boston_data(listings_url, reviews_url, 2019)
boston_neighbourhood_2019.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,70709969,2016-04-19,13807554,Juan,Last minute booking for me so I was not too pi...,2019
1,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,74906423,2016-05-17,31130305,Eva,Alan and Eddie were great hosts. We met Eddie ...,2019
2,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,75664635,2016-05-22,11668951,Rebecca,The location is absolutely perfect for Boston!...,2019
3,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,76616348,2016-05-28,29816733,Ivan,"Room was nice, but I wish I had known in advan...",2019
4,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.35056,-71.06565,Entire home/apt,182,...,0.70,9,362,12211821,79381989,2016-06-12,67031876,Whitney,Alan and Edward were very great hosts! My chec...,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56489,Downtown,29635339,"Luxury 2 bedroom, 2.5 Bathroom, Downtown Boston",118223333,Gary,,42.35496,-71.06021,Entire home/apt,275,...,,1,15,,,,,,,2019
56490,Downtown,32864952,Boston Commons Condos 2BD 1F2,8534462,Corp Condos & Apts,,42.35191,-71.06452,Entire home/apt,400,...,0.39,137,182,32864952,430550865,2019-03-30,149425033,Shaleika,Worst place i have ever walked into. The place...,2019
56491,Dorchester,35238309,Location location location and so beautiful!!!...,261646616,Joseph,,42.31584,-71.05796,Private room,500,...,,3,85,,,,,,,2019
56492,Allston,35614764,Spacious 1 bedroom apartment on commonwealth ave,90175388,Alekya,,42.35299,-71.12243,Entire home/apt,200,...,,1,25,,,,,,,2019


In [9]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2018-07-18/visualisations/listings.csv"
boston_neighbourhood_2018 = get_complete_boston_data(listings_url, reviews_url, 2018)
boston_neighbourhood_2018.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.350563870332486,-71.06565364219792,Entire home/apt,182,...,0.88,9,349,12211821,70709969,2016-04-19,13807554,Juan,Last minute booking for me so I was not too pi...,2018
1,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.350563870332486,-71.06565364219792,Entire home/apt,182,...,0.88,9,349,12211821,74906423,2016-05-17,31130305,Eva,Alan and Eddie were great hosts. We met Eddie ...,2018
2,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.350563870332486,-71.06565364219792,Entire home/apt,182,...,0.88,9,349,12211821,75664635,2016-05-22,11668951,Rebecca,The location is absolutely perfect for Boston!...,2018
3,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.350563870332486,-71.06565364219792,Entire home/apt,182,...,0.88,9,349,12211821,76616348,2016-05-28,29816733,Ivan,"Room was nice, but I wish I had known in advan...",2018
4,Downtown,12211821,Fab Dwntwn Studio Theater & Common,5618949,Alan,,42.350563870332486,-71.06565364219792,Entire home/apt,182,...,0.88,9,349,12211821,79381989,2016-06-12,67031876,Whitney,Alan and Edward were very great hosts! My chec...,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44140,Allston,25914450,Lovely Day,187707739,Yehuda,,42.35375808904245,-71.13733178267444,Private room,65,...,2.81,6,54,25914450,279761091,2018-06-21,63818801,Daniel,Nice and clean space! The location is convenie...,2018
44141,Allston,25914450,Lovely Day,187707739,Yehuda,,42.35375808904245,-71.13733178267444,Private room,65,...,2.81,6,54,25914450,281828334,2018-06-25,28588628,Aditi,The place is very well located and is incredib...,2018
44142,Dorchester,26138090,Comfortable room in Dorchester (#1),188413439,Hilson,,42.306501109039026,-71.06665534423358,Private room,65,...,1,4,33,26138090,288729383,2018-07-10,179467430,LaDoris,"This is a great place to stay!!! Very clean, q...",2018
44143,Fenway,26935855,Amazing view of the apartme,194322480,Nhgio,,42.34572531468303,-71.08824820401912,Private room,204,...,,1,362,,,,,,,2018


In [10]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2017-10-06/visualisations/listings.csv"
boston_neighbourhood_2017 = get_complete_boston_data(listings_url, reviews_url, 2017)
boston_neighbourhood_2017.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,113687242,2016-11-13,103088025,Gerri,We had a great stay at Allison's home. Her pl...,2017
1,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,121702696,2016-12-18,105866458,Jennifer,Staying in Allison's home was such a wonderful...,2017
2,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,132845450,2017-02-19,116909212,Brenna,Allisons place was perfect! Especially if you ...,2017
3,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,136863198,2017-03-12,118336543,Peter,"""This place was amazing, I'd even go as far as...",2017
4,Dorchester,10743879,"Charming, sun filled condo in quiet neighborhood",41269443,Allison,,42.3172840909121,-71.05493708945174,Entire home/apt,150,...,1.19,1,22,10743879,138393020,2017-03-19,110442436,Nicole,The house was a little out of the way for the ...,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28609,Dorchester,19599031,Home away from home,118883513,Darryl,,42.28615614997633,-71.03953104938581,Private room,70,...,7.32,1,347,19599031,197092669,2017-09-24,150881862,Dione,"Amazing host, place was lovely and would recom...",2017
28610,Dorchester,19599031,Home away from home,118883513,Darryl,,42.28615614997633,-71.03953104938581,Private room,70,...,7.32,1,347,19599031,198802427,2017-09-30,152268461,David,Darryl was a great host (and so was Gigi!). Th...,2017
28611,Dorchester,21148820,Private Apartment Close to Redline T Subway,18479599,Agnieszka,,42.325976820904145,-71.06016439244476,Entire home/apt,250,...,,9,309,,,,,,,2017
28612,Dorchester,21217710,Cozy Boston Room,153299935,NhuanKim,,42.31497905682507,-71.0564388777616,Private room,35,...,,1,56,,,,,,,2017


In [11]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/visualisations/listings.csv"
boston_neighbourhood_2016 = get_complete_boston_data(listings_url, reviews_url, 2016)
boston_neighbourhood_2016.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Allston,4402209,l GREAT ROOM BOSOTN,8139434,Ari,,42.35208077328276,-71.13599057398088,Private room,100,...,3.16,10,119,4402209,98811254,2016-09-02,73127997,紹桀,Great( ´` )ﾉ,2016
1,Allston,4402209,l GREAT ROOM BOSOTN,8139434,Ari,,42.35208077328276,-71.13599057398088,Private room,100,...,3.16,10,119,4402209,98653881,2016-09-01,73127997,紹桀,"Ari is a really nice host, willing to help me ...",2016
2,Allston,4402209,l GREAT ROOM BOSOTN,8139434,Ari,,42.35208077328276,-71.13599057398088,Private room,100,...,3.16,10,119,4402209,89696317,2016-07-29,18169449,Cui,"It's a very convenient location, and Ari provi...",2016
3,Allston,4402209,l GREAT ROOM BOSOTN,8139434,Ari,,42.35208077328276,-71.13599057398088,Private room,100,...,3.16,10,119,4402209,82408950,2016-06-27,62950686,Atalya,Ari has this hosting thing down to a T! Pure p...,2016
4,Allston,4402209,l GREAT ROOM BOSOTN,8139434,Ari,,42.35208077328276,-71.13599057398088,Private room,100,...,3.16,10,119,4402209,82051490,2016-06-26,61663856,Yechiel,Just like every time my stay was perfect:-),2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17468,Downtown,1868124,Lux Downtown Boston 1BR Apt w/pool,9419684,Mike,,42.35130793025092,-71.06350574269595,Entire home/apt,293,...,0.30,61,83,1868124,21015150,2014-10-09,21945248,Yucheng,The apartment is excellent. It has a comfortab...,2016
17469,Downtown,1868124,Lux Downtown Boston 1BR Apt w/pool,9419684,Mike,,42.35130793025092,-71.06350574269595,Entire home/apt,293,...,0.30,61,83,1868124,19838704,2014-09-19,1798693,Courtney,This place was great! The host was very detail...,2016
17470,Downtown,1868124,Lux Downtown Boston 1BR Apt w/pool,9419684,Mike,,42.35130793025092,-71.06350574269595,Entire home/apt,293,...,0.30,61,83,1868124,18677945,2014-08-30,6015044,Yaa A,It was a wonderful experience. Checking in was...,2016
17471,Downtown,1868124,Lux Downtown Boston 1BR Apt w/pool,9419684,Mike,,42.35130793025092,-71.06350574269595,Entire home/apt,293,...,0.30,61,83,1868124,16032316,2014-07-20,17561906,Lynne,We really enjoyed our week in Boston. The apar...,2016


In [12]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/data/reviews.csv.gz"
listings_url = "http://data.insideairbnb.com/united-states/ma/boston/2015-10-03/visualisations/listings.csv"
boston_neighbourhood_2015 = get_complete_boston_data(listings_url, reviews_url, 2015)
boston_neighbourhood_2015.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,44668713,2015-08-28,12066100,Eddie,"The house owner, Raphael, is a very kind and e...",2015
1,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,35203143,2015-06-16,29246587,Michael,Raphael's home was well maintained and very cl...,2015
2,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,34523184,2015-06-09,26172960,Joe,"""This apartment was perfect for our trip to a ...",2015
3,Allston,5103430,Brand New duplex - 3 BDR / 3 BA,26367207,Raphael,,42.3521929684635,-71.12961763207782,Entire home/apt,310,...,0.88,3,68,5103430,32679028,2015-05-20,5961958,Erich,"Awesome place, great stay, host was very helpf...",2015
4,Allston,7052847,1 big room in a cute 2 bedroom apt,10336060,Rebeca,,42.352805645017064,-71.12398135726829,Private room,60,...,,1,70,,,,,,,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10968,Downtown,1059326,Houseboat - NYC Loft style on Water,1335344,Catherine,,42.36164758775595,-71.04759646999447,Entire home/apt,350,...,0.40,1,364,1059326,8487809,2013-11-02,5065556,Aaron,The reservation was canceled the day before ar...,2015
10969,Downtown,1059326,Houseboat - NYC Loft style on Water,1335344,Catherine,,42.36164758775595,-71.04759646999447,Entire home/apt,350,...,0.40,1,364,1059326,7954583,2013-10-09,1615658,Amy Jo,"Catherine was great, super thorough. Her hous...",2015
10970,Downtown,1059326,Houseboat - NYC Loft style on Water,1335344,Catherine,,42.36164758775595,-71.04759646999447,Entire home/apt,350,...,0.40,1,364,1059326,7858814,2013-10-06,8088539,Marie,The houseboat is in an amazing location—we wal...,2015
10971,Downtown,1059326,Houseboat - NYC Loft style on Water,1335344,Catherine,,42.36164758775595,-71.04759646999447,Entire home/apt,350,...,0.40,1,364,1059326,7473866,2013-09-21,6470371,Mitchell,What a great spot. Definitely for a couple or...,2015


In [13]:
dorchester_neighbourhood = boston_neighbourhood_2015.where("neighbourhood == 'Fenway'").union(boston_neighbourhood_2016.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2017.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2018.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2019.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood = dorchester_neighbourhood.union(boston_neighbourhood_2020.where("neighbourhood == 'Fenway'"))
dorchester_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Fenway,8552387,Bright Back Bay Brownstone,28880663,Amanda,,42.35117407918826,-71.08998038575537,Entire home/apt,225,...,,1,9,,,,,,,2015
1,Fenway,8469841,Large Room Back Bay Close to MIT,16953664,Ife,,42.3510130004383,-71.09012784825632,Private room,90,...,1,3,346,8469841,48817108,2015-09-28,44610837,Alan,I had a great 1st experience using airbnb. If...,2015
2,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,47402256,2015-09-18,4228486,Natalie,"The apartment is clean, comfortable and very ...",2015
3,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,44984587,2015-08-30,36039829,Marco,I booked Lian's flat for my parents and they r...,2015
4,Fenway,5086521,Modern & spacious studio for two,1868179,Ryan,,42.34086101708686,-71.08828863604951,Entire home/apt,150,...,1.55,1,130,5086521,46117564,2015-09-08,31812081,Maximilian,"The place was exactly as described, location w...",2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9073,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,30315640,2015-04-20,28743839,Nanci,The description of this place is (email hidden...,2020
9074,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,32567096,2015-05-19,15747050,Ann,A very lovely place in a perfect location. Ne...,2020
9075,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,33313882,2015-05-26,26989637,Prashant,Jennifer was really helpful and provided me an...,2020
9076,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,37398353,2015-07-06,33661506,Kathleen,Jennifer and the apartment were great! At firs...,2020


In [14]:
dorchester_neighbourhood.createOrReplaceTempView("neighbourhood")

Unnamed: 0,room_type,year,count(room_type),average_price
0,Private room,2015,243,122.86
1,Entire home/apt,2015,1215,187.86
2,Shared room,2015,55,81.8
3,Private room,2016,243,122.86
4,Shared room,2016,55,81.8
5,Entire home/apt,2016,1215,187.86
6,Shared room,2017,55,81.8
7,Entire home/apt,2017,1215,187.86
8,Private room,2017,243,122.86
9,Entire home/apt,2018,1215,187.86


In [20]:
boston_neighbourhood_2020.count()

10973

In [17]:
dorchester_neighbourhood.select("comments").dropDuplicates().toPandas()

Unnamed: 0,comments
0,La estancia con Angela ha sido increíble. No p...
1,I stayed with Angela for 3 weeks and cannot sp...
2,My short stay at Joe's was very satisfactory. ...
3,We didn't meet the host in person but Sean lef...
4,"Sehr studentisch, praktisch neben der Turnpike..."
...,...
1446,"The apartment is great. Good space, good locat..."
1447,"Great apartment, great location, great host! ..."
1448,"Kelly was very friendly, helpful and concerned..."
1449,Great location. Great apartment. Host went out...


In [37]:
sentiment_udf = F.udf(lambda x:TextBlob(x).sentiment.polarity if x is not None else None , T.StringType())

In [26]:
dorchester_neighbourhood['sentiment_polarity'] = dorchester_neighbourhood..withColumn(f.col("comments"))

AttributeError: 'DataFrame' object has no attribute 'apply'

In [38]:
dorchester_neighbourhood.withColumn('review_polarity', sentiment_udf(F.col("comments"))).toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year,review_polarity
0,Fenway,8552387,Bright Back Bay Brownstone,28880663,Amanda,,42.35117407918826,-71.08998038575537,Entire home/apt,225,...,1,9,,,,,,,2015,
1,Fenway,8469841,Large Room Back Bay Close to MIT,16953664,Ife,,42.3510130004383,-71.09012784825632,Private room,90,...,3,346,8469841,48817108,2015-09-28,44610837,Alan,I had a great 1st experience using airbnb. If...,2015,0.31500000000000006
2,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1,58,7757132,47402256,2015-09-18,4228486,Natalie,"The apartment is clean, comfortable and very ...",2015,0.3766666666666667
3,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1,58,7757132,44984587,2015-08-30,36039829,Marco,I booked Lian's flat for my parents and they r...,2015,0.07714285714285714
4,Fenway,5086521,Modern & spacious studio for two,1868179,Ryan,,42.34086101708686,-71.08828863604951,Entire home/apt,150,...,1,130,5086521,46117564,2015-09-08,31812081,Maximilian,"The place was exactly as described, location w...",2015,0.4461111111111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9073,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,30315640,2015-04-20,28743839,Nanci,The description of this place is (email hidden...,2020,0.3726190476190476
9074,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,32567096,2015-05-19,15747050,Ann,A very lovely place in a perfect location. Ne...,2020,0.21796969696969692
9075,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,33313882,2015-05-26,26989637,Prashant,Jennifer was really helpful and provided me an...,2020,0.4979166666666667
9076,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,37398353,2015-07-06,33661506,Kathleen,Jennifer and the apartment were great! At firs...,2020,0.13065907522429263


In [41]:
str(TextBlob("Fue un gran sitio").translate(to= 'en'))

'It was a great site'

In [42]:
translate_udf = F.udf(lambda x: str(TextBlob(x).translate(to= 'en')) if x is not None else None , T.StringType())

In [43]:
dorchester_neighbourhood.withColumn('comments', translate_udf(F.col("comments"))).toPandas()

Py4JJavaError: An error occurred while calling o497.collectToPython.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 116 in stage 87.0 failed 1 times, most recent failure: Lost task 116.0 in stage 87.0 (TID 10464, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 352, in dump_stream
    self.serializer.dump_stream(self._batched(iterator), stream)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 142, in dump_stream
    for obj in iterator:
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 341, in _batched
    for item in iterator:
  File "<string>", line 1, in <lambda>
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 85, in <lambda>
    return lambda *a: f(*a)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/util.py", line 99, in wrapper
    return f(*args, **kwargs)
  File "<ipython-input-42-d9e7e7d54a76>", line 1, in <lambda>
  File "/opt/conda/lib/python3.7/site-packages/textblob/blob.py", line 547, in translate
    from_lang=from_lang, to_lang=to))
  File "/opt/conda/lib/python3.7/site-packages/textblob/translate.py", line 61, in translate
    self._validate_translation(source, result)
  File "/opt/conda/lib/python3.7/site-packages/textblob/translate.py", line 85, in _validate_translation
    raise NotTranslated('Translation API returned the input string unchanged.')
textblob.exceptions.NotTranslated: Translation API returned the input string unchanged.

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
	at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:81)
	at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:64)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage7.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
	at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
	at org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)
	at org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)
	at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
	at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 352, in dump_stream
    self.serializer.dump_stream(self._batched(iterator), stream)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 142, in dump_stream
    for obj in iterator:
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 341, in _batched
    for item in iterator:
  File "<string>", line 1, in <lambda>
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 85, in <lambda>
    return lambda *a: f(*a)
  File "/usr/local/spark/python/lib/pyspark.zip/pyspark/util.py", line 99, in wrapper
    return f(*args, **kwargs)
  File "<ipython-input-42-d9e7e7d54a76>", line 1, in <lambda>
  File "/opt/conda/lib/python3.7/site-packages/textblob/blob.py", line 547, in translate
    from_lang=from_lang, to_lang=to))
  File "/opt/conda/lib/python3.7/site-packages/textblob/translate.py", line 61, in translate
    self._validate_translation(source, result)
  File "/opt/conda/lib/python3.7/site-packages/textblob/translate.py", line 85, in _validate_translation
    raise NotTranslated('Translation API returned the input string unchanged.')
textblob.exceptions.NotTranslated: Translation API returned the input string unchanged.

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
	at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:81)
	at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:64)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage7.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more


In [44]:
def translate(sentence):
    try:
        str(TextBlob(x).translate(to= 'en'))
    except:
        return sentence

In [45]:
translate_udf = F.udf(lambda sentence: translate(sentence) if sentence is not None else None , T.StringType())

In [47]:
dorchester_neighbourhood = dorchester_neighbourhood.withColumn('comments', translate_udf(F.col("comments")))
dorchester_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year
0,Fenway,8552387,Bright Back Bay Brownstone,28880663,Amanda,,42.35117407918826,-71.08998038575537,Entire home/apt,225,...,,1,9,,,,,,,2015
1,Fenway,8469841,Large Room Back Bay Close to MIT,16953664,Ife,,42.3510130004383,-71.09012784825632,Private room,90,...,1,3,346,8469841,48817108,2015-09-28,44610837,Alan,I had a great 1st experience using airbnb. If...,2015
2,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,47402256,2015-09-18,4228486,Natalie,"The apartment is clean, comfortable and very ...",2015
3,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1.71,1,58,7757132,44984587,2015-08-30,36039829,Marco,I booked Lian's flat for my parents and they r...,2015
4,Fenway,5086521,Modern & spacious studio for two,1868179,Ryan,,42.34086101708686,-71.08828863604951,Entire home/apt,150,...,1.55,1,130,5086521,46117564,2015-09-08,31812081,Maximilian,"The place was exactly as described, location w...",2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9073,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,30315640,2015-04-20,28743839,Nanci,The description of this place is (email hidden...,2020
9074,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,32567096,2015-05-19,15747050,Ann,A very lovely place in a perfect location. Ne...,2020
9075,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,33313882,2015-05-26,26989637,Prashant,Jennifer was really helpful and provided me an...,2020
9076,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,0.90,1,29,5671806,37398353,2015-07-06,33661506,Kathleen,Jennifer and the apartment were great! At firs...,2020


In [48]:
sentiment_udf = F.udf(lambda x:TextBlob(x).sentiment.polarity if x is not None else None , T.StringType())

In [49]:
dorchester_neighbourhood = dorchester_neighbourhood.withColumn('review_polarity', sentiment_udf(F.col("comments")))
dorchester_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments,year,review_polarity
0,Fenway,8552387,Bright Back Bay Brownstone,28880663,Amanda,,42.35117407918826,-71.08998038575537,Entire home/apt,225,...,1,9,,,,,,,2015,
1,Fenway,8469841,Large Room Back Bay Close to MIT,16953664,Ife,,42.3510130004383,-71.09012784825632,Private room,90,...,3,346,8469841,48817108,2015-09-28,44610837,Alan,I had a great 1st experience using airbnb. If...,2015,0.31500000000000006
2,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1,58,7757132,47402256,2015-09-18,4228486,Natalie,"The apartment is clean, comfortable and very ...",2015,0.3766666666666667
3,Fenway,7757132,Charming and spacious 1 bedroom,8958021,Lian,,42.34207635265539,-71.09799981268785,Entire home/apt,190,...,1,58,7757132,44984587,2015-08-30,36039829,Marco,I booked Lian's flat for my parents and they r...,2015,0.07714285714285714
4,Fenway,5086521,Modern & spacious studio for two,1868179,Ryan,,42.34086101708686,-71.08828863604951,Entire home/apt,150,...,1,130,5086521,46117564,2015-09-08,31812081,Maximilian,"The place was exactly as described, location w...",2015,0.4461111111111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9073,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,30315640,2015-04-20,28743839,Nanci,The description of this place is (email hidden...,2020,0.3726190476190476
9074,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,32567096,2015-05-19,15747050,Ann,A very lovely place in a perfect location. Ne...,2020,0.21796969696969692
9075,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,33313882,2015-05-26,26989637,Prashant,Jennifer was really helpful and provided me an...,2020,0.4979166666666667
9076,Fenway,5671806,Luxury Fenway Brownstone-Beacon St,29406566,Jennifer,,42.34535000231358,-71.10684236073557,Entire home/apt,400,...,1,29,5671806,37398353,2015-07-06,33661506,Kathleen,Jennifer and the apartment were great! At firs...,2020,0.13065907522429263


In [73]:
fenway_neighbourhood = dorchester_neighbourhood.select("room_type", "review_polarity", "year")
fenway_neighbourhood_home = fenway_neighbourhood.where("room_type == 'Entire home/apt'")
fenway_neighbourhood_shared = fenway_neighbourhood.where("room_type == 'Shared room'")
fenway_neighbourhood_private = fenway_neighbourhood.where("room_type == 'Private room'")

In [58]:
import plotly.express as px

In [69]:
fig = px.histogram(fenway_neighbourhood.toPandas(), x="review_polarity", color='room_type')
fig.show()

In [79]:
fenway_neighbourhood = dorchester_neighbourhood.select("room_type", "review_polarity", "year")
fenway_neighbourhood.toPandas()

Unnamed: 0,room_type,review_polarity,year
0,Entire home/apt,,2015
1,Private room,0.31500000000000006,2015
2,Entire home/apt,0.3766666666666667,2015
3,Entire home/apt,0.07714285714285714,2015
4,Entire home/apt,0.4461111111111111,2015
...,...,...,...
9073,Private room,0.25,2020
9074,Private room,0.0,2020
9075,Private room,0.0,2020
9076,Private room,0.0,2020


In [83]:
fenway_neighbourhood_home.select("review_polarity").toPandas()

Unnamed: 0,review_polarity
0,
1,0.3766666666666667
2,0.07714285714285714
3,0.4461111111111111
4,0.5591666666666667
...,...
7285,0.48875
7286,0.13065907522429263
7287,0.4979166666666667
7288,0.21796969696969692


In [84]:
fenway_neighbourhood_2015 = px.histogram(fenway_neighbourhood.toPandas(), x="review_polarity", color='room_type')
fenway_neighbourhood_2015.show()

In [85]:
fig = go.Figure()

In [None]:
fig.add_trace(go.Histogram(x=))
fig.add_trace(go.Histogram(x=x1))

In [86]:
fenway_neighbourhood.where("room_type == 'Entire home/apt'").toPandas()

Unnamed: 0,room_type,review_polarity,year
0,Entire home/apt,,2015
1,Entire home/apt,0.3766666666666667,2015
2,Entire home/apt,0.07714285714285714,2015
3,Entire home/apt,0.4461111111111111,2015
4,Entire home/apt,0.5591666666666667,2015
...,...,...,...
7285,Entire home/apt,0.48875,2020
7286,Entire home/apt,0.13065907522429263,2020
7287,Entire home/apt,0.4979166666666667,2020
7288,Entire home/apt,0.21796969696969692,2020
