In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark import SparkFiles

In [13]:
spark = SparkSession\
        .builder\
        .master("local")\
        .config("spark.files.overwrite", True)\
        .getOrCreate()

In [3]:
boston_reviews_df = spark.read.csv("data/Boston/2020/June/reviews.csv", header=True)
boston_reviews_df = boston_reviews_df.withColumnRenamed("id", "review_id")
boston_reviews_df.toPandas()

Unnamed: 0,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,3781,37776825,2015-07-10,36059247,Greg,The apartment was as advertised and Frank was ...
1,3781,41842494,2015-08-09,10459388,Tai,It was a pleasure to stay at Frank's place. Th...
2,3781,45282151,2015-09-01,12264652,Damien,The apartment description is entirely faithful...
3,3781,49022647,2015-09-30,41426327,Mike,Thoroughly enjoyed my time at Frank's home. Ha...
4,3781,52503327,2015-10-30,15151513,Ivan,Great value for the money! This location has e...
...,...,...,...,...,...,...
179901,43442619,625246771,2020-05-18,41655471,Adrian,Highly recommend this place if you're visiting...
179902,43442619,626642174,2020-05-29,72655982,Eliot,View is great. A nice two bedroom. Park next d...
179903,"At my time of stay (05/20) fees were exorbitant.""",,,,,
179904,43489770,627807090,2020-06-05,347912013,Keiji,Perfect stay


In [5]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.count()

3446

In [6]:
boston_listings_df.createOrReplaceTempView("boston_listings")

In [7]:
spark.sql("SELECT * FROM boston_listings WHERE id='3781'").toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97


In [63]:
boston_reviews_df.createOrReplaceTempView("boston_reviews")

In [64]:
spark.sql("SELECT comments FROM boston_reviews WHERE listing_id='3781'").toPandas()

Unnamed: 0,comments
0,The apartment was as advertised and Frank was ...
1,It was a pleasure to stay at Frank's place. Th...
2,The apartment description is entirely faithful...
3,Thoroughly enjoyed my time at Frank's home. Ha...
4,Great value for the money! This location has e...
5,Frank was very accommodating throughout my sta...
6,Excellent! This was my second stay at the Jeff...
7,Frank was a wonderful and accommodating host. ...
8,"Frank was great, the apartment has everything ..."
9,Very nice. Comfortable apartment. Good locatio...


In [8]:
boston_listings_df = spark.read.csv("data/Boston/2020/June/listings_original.csv", header=True)
boston_listings_df.toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [37]:
boston_neighbourhoods_df = spark.read.csv("data/Boston/2020/June/neighbourhoods.csv", header=True)
boston_neighbourhoods_df.toPandas()

Unnamed: 0,neighbourhood_group,neighbourhood
0,,Allston
1,,Back Bay
2,,Bay Village
3,,Beacon Hill
4,,Brighton
5,,Charlestown
6,,Chinatown
7,,Dorchester
8,,Downtown
9,,East Boston


In [9]:
neighbourhoods_list = ["Back Bay", 'South Boston', 'South End', 'Fenway', 'Allston', 'Dorchester', 'Downtown']
boston_neighbourhoods_df = spark.createDataFrame(neighbourhoods_list, StringType())
boston_neighbourhoods_df = boston_neighbourhoods_df.selectExpr("value as neighbourhood")

In [10]:
boston_top_neighbourhood = boston_listings_df.join(boston_neighbourhoods_df, ["neighbourhood"] , "left_semi")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,South End,22354,COPLEY SQ...19TH C.TWNHSE SUNNY RM,85770,Robert,,42.34496,-71.07486,Private room,148,2,316,2019-12-13,2.93,5,90
1,South End,225224,4th floor office room,85770,Robert,,42.34643,-71.07503,Private room,105,2,318,2020-03-21,3.02,5,90
2,South End,322593,"Near Hospitals, 28 Day Bking Only,OutdoorSpace",1651480,Edward,,42.34232,-71.07594,Entire home/apt,125,28,408,2020-03-31,4.65,4,91
3,South End,526970,BACK BAY/COPLEY FIRST FLOOR ROOM,85770,Robert,,42.34523,-71.07282,Private room,148,2,175,2019-10-31,1.80,5,90
4,South End,798957,"★Discounted★ NearHospitals★2floors★ 2 Baths,2beds",1651480,Edward,,42.34232,-71.07594,Entire home/apt,347,28,417,2020-04-30,4.82,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,South Boston,43102656,Large bedroom in shared apartment with two males,10778805,Paul,,42.32958,-71.05638,Private room,50,90,0,,,3,363
1663,South Boston,43137883,Spacious Queen Room in South Boston,297860058,June,,42.33393,-71.02503,Private room,48,30,0,,,31,364
1664,South Boston,43638802,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34313,-71.05606,Private room,90,28,0,,,92,141
1665,South Boston,43639005,WB Quarters™ by STRB|Hangout|Priv. Room|Shared BA,814298,Thatch,,42.34301,-71.0573,Private room,90,28,0,,,92,144


In [11]:
boston_top_neighbourhood = boston_top_neighbourhood.join(boston_reviews_df, boston_top_neighbourhood["id"] == boston_reviews_df["listing_id"], "left")
boston_top_neighbourhood.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,106168374,2016-10-04,17779001,JeanEric,Spotlessly clean apartment & was exactly as ad...
1,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,109610832,2016-10-22,93668045,Björn,Einhver besta mögulega staðsetning í Boston. L...
2,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,156889599,2017-06-01,77837346,Mogens,"It is placed in heart of Boston, close to ever..."
3,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,174024490,2017-07-24,14811727,Courtney,"Nice location, easy stay."
4,Downtown,15127120,Sonder | Winthrop Square | Sleek Studio,12243051,Sonder,,42.35461,-71.05935,Entire home/apt,93,...,2019-12-22,1.54,61,278,15127120,175756708,2017-07-29,122100846,Marie-Claude,Bien situé. Très près transport en commun et p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64851,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,614850845,2020-03-06,65276350,Patty,Es un apartamento
64852,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,616753381,2020-03-10,45242217,A&J,Great space. Near many restaurants but also qu...
64853,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,617688297,2020-03-13,153288282,Rowan,Lovely stay! Definitely a great place for food...
64854,Allston,38131632,"Gorgeous space in vibrant area, steps to the t614",18110188,Cindy,,42.35192,-71.12404,Entire home/apt,133,...,2020-04-23,3.71,48,78,38131632,618083877,2020-03-14,313838678,Alisha,Good place for value


In [26]:
sentence = list(boston_reviews_df.select("comments").head(1)[0])
sentence

['The apartment was as advertised and Frank was incredibly helpful through the entire process. I would definitely recommend this place.']

In [29]:
from textblob import TextBlob

In [46]:
TextBlob(sentence[0]).sentiment

Sentiment(polarity=0.3, subjectivity=0.6749999999999999)

In [65]:
sentence = list(boston_reviews_df.where("listing_id == 15127120").select("comments").head(2)[1])
TextBlob(sentence[0]).translate(to= 'en')

TextBlob("Some of the best possible location in Boston. Small apartment but there is everything you need. New and tidy.")

In [49]:
dorchester_neighbourhood_df = boston_top_neighbourhood.where("neighbourhood == 'Dorchester'")
dorchester_neighbourhood_df.toPandas()

Unnamed: 0,neighbourhood,id,name,host_id,host_name,neighbourhood_group,latitude,longitude,room_type,price,...,last_review,reviews_per_month,calculated_host_listings_count,availability_365,listing_id,review_id,date,reviewer_id,reviewer_name,comments
0,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,228392128,2018-01-19,162953953,Samuel,Really a nice stay. Warm accommodations. A lot...
1,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,232850966,2018-02-06,108441156,Loise Coleen,This place is a wonderful value in a very nice...
2,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,249045170,2018-04-01,50884175,Marcelo,"Eder, is the best host I had so far! He’s amaz..."
3,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,276689352,2018-06-14,126237644,Gloria,La casa è molto confortevole e il marito di E...
4,Dorchester,22571244,✪2 Double beds 4mi to Downtown near subway fre...,25203946,Eder,,42.30936,-71.06347,Private room,44,...,2019-06-13,0.34,17,160,22571244,279053437,2018-06-19,140123458,F,"Located close to Boston, this place is a home ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19511,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,571718747,2019-12-01,195425991,Janet,This is a beautiful old house. But maybe some ...
19512,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,601081009,2020-02-02,88231857,Deniz,We stayed at Joe’s place during our Boston tri...
19513,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,607736226,2020-02-18,14250998,Raquel,"It's a beautiful space, roomy, clean, and comf..."
19514,Dorchester,35238309,1 Private suite/bath-rm best location locatio...,261646616,Joseph,,42.31583,-71.05798,Entire home/apt,500,...,2020-03-08,1.98,4,95,35238309,613056113,2020-03-01,326248264,Liam,Very nice place to stay if you’re looking to s...


In [50]:
dorchester_neighbourhood_df.createOrReplaceTempView("dorchester_table")

In [54]:
spark.sql("""SELECT room_type, count(room_type), ROUND(avg(price), 2) as average_price 
            FROM dorchester_table
            GROUP BY room_type
            ORDER BY average_price""").show()

+---------------+----------------+-------------+
|      room_type|count(room_type)|average_price|
+---------------+----------------+-------------+
|   Private room|           15297|        82.21|
|     Hotel room|             153|       130.36|
|Entire home/apt|            4066|       149.55|
+---------------+----------------+-------------+



In [70]:
url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualizations/listings.csv"
spark.read.csv(url, header=True)

Py4JJavaError: An error occurred while calling o517.csv.
: java.io.IOException: No FileSystem for scheme: http
	at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2660)
	at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667)
	at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
	at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
	at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
	at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
	at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
	at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:547)
	at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:545)
	at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
	at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
	at scala.collection.immutable.List.foreach(List.scala:392)
	at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
	at scala.collection.immutable.List.flatMap(List.scala:355)
	at org.apache.spark.sql.execution.datasources.DataSource.org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary(DataSource.scala:545)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:359)
	at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
	at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:619)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)


In [75]:
url = "https://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualizations/listings.csv"
from pyspark import SparkFiles
spark.sparkContext.addFile(url)

In [76]:
spark.read.csv("file://"+SparkFiles.get("listings.csv"), header=True)

AnalysisException: 'Path does not exist: file:/tmp/spark-e9bf86f1-2c41-44e6-8e8f-721103584c5c/userFiles-6363795a-f8b7-42ab-b48e-2f9570027006/listings.csv;'

In [4]:
from pyspark import SparkFiles
url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/listings.csv"
spark.sparkContext.addFile(url)

In [6]:
spark.read.csv(SparkFiles.get("listings.csv"), header=True).toPandas()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [3]:
url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/visualisations/"
listings_url = url + "listings.csv"
reviews_url = url + "reviews.csv"
spark.sparkContext.addFile(listings_url)
spark.sparkContext.addFile(reviews_url)

In [7]:
listings_df = spark.read.csv(SparkFiles.get("listings.csv"), header=True)
listings_df.toPandas()


Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,3781,HARBORSIDE-Walk to subway,4804,Frank,,East Boston,42.36413,-71.02991,Entire home/apt,125,28,16,2019-12-21,0.27,1,97
1,5506,**$49 Special ** Private! Minutes to center!,8229,Terry,,Roxbury,42.32981,-71.09559,Entire home/apt,145,3,107,2020-05-01,0.78,6,333
2,6695,$99 Special!! Home Away! Condo,8229,Terry,,Roxbury,42.32994,-71.09351,Entire home/apt,169,3,115,2019-11-02,0.87,6,317
3,8789,Curved Glass Studio/1bd facing Park,26988,Anne,,Downtown,42.35919,-71.06265,Entire home/apt,99,91,25,2020-04-15,0.35,8,365
4,10730,Bright 1bed facing Golden Dome,26988,Anne,,Downtown,42.3584,-71.06185,Entire home/apt,150,91,32,2020-04-16,0.25,8,282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3441,43690671,Beautiful 2 bedroom apartment/2nd floor,127718668,Muny,,Dorchester,42.31281,-71.05591,Entire home/apt,170,1,0,,,13,363
3442,43690782,Beautiful 2-bedroom apartment/3rd floor,127718668,Muny,,Dorchester,42.31235,-71.0553,Entire home/apt,170,1,0,,,13,361
3443,43715032,2.bostonparkplaza · SUPERIOR ROOM,243931054,Boston Park Plaza,,Downtown,42.35167,-71.07058,Hotel room,100,1,0,,,13,348
3444,43715125,Private Escape,349225095,Trevor,,Dorchester,42.29637,-71.07314,Private room,90,1,0,,,1,84


In [8]:
reviews_df = spark.read.csv(SparkFiles.get("reviews.csv"), header=True)
reviews_df.toPandas()

Unnamed: 0,listing_id,date
0,3781,2015-07-10
1,3781,2015-08-09
2,3781,2015-09-01
3,3781,2015-09-30
4,3781,2015-10-30
...,...,...
140887,43257845,2020-05-17
140888,43442619,2020-05-18
140889,43442619,2020-05-29
140890,43489770,2020-06-05


In [3]:
reviews_url = "http://data.insideairbnb.com/united-states/ma/boston/2020-06-10/data/reviews.csv.gz"
spark.sparkContext.addFile(reviews_url)


In [4]:
spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz")).show()

+--------------------+--------------------+--------------------+--------------------+-------------+--------------------+
|          listing_id|                  id|                date|         reviewer_id|reviewer_name|            comments|
+--------------------+--------------------+--------------------+--------------------+-------------+--------------------+
|                3781|            37776825|          2015-07-10|            36059247|         Greg|The apartment was...|
|                3781|            41842494|          2015-08-09|            10459388|          Tai|It was a pleasure...|
|                3781|            45282151|          2015-09-01|            12264652|       Damien|The apartment des...|
|                3781|            49022647|          2015-09-30|            41426327|         Mike|Thoroughly enjoye...|
|                3781|            52503327|          2015-10-30|            15151513|         Ivan|Great value for t...|
|1. Photos don't d...|          

In [5]:
url = "http://data.insideairbnb.com/united-states/ma/boston/2020-05-12/data/reviews.csv.gz"
spark.sparkContext.addFile(reviews_url)

In [6]:
spark.read.option("header", "true").csv(SparkFiles.get("reviews.csv.gz")).show()

+--------------------+--------------------+--------------------+--------------------+-------------+--------------------+
|          listing_id|                  id|                date|         reviewer_id|reviewer_name|            comments|
+--------------------+--------------------+--------------------+--------------------+-------------+--------------------+
|                3781|            37776825|          2015-07-10|            36059247|         Greg|The apartment was...|
|                3781|            41842494|          2015-08-09|            10459388|          Tai|It was a pleasure...|
|                3781|            45282151|          2015-09-01|            12264652|       Damien|The apartment des...|
|                3781|            49022647|          2015-09-30|            41426327|         Mike|Thoroughly enjoye...|
|                3781|            52503327|          2015-10-30|            15151513|         Ivan|Great value for t...|
|1. Photos don't d...|          

In [7]:
url = "http://data.insideairbnb.com/united-states/ma/boston/2020-05-12/visualisations/listings.csv"
spark.sparkContext.addFile(url)

In [9]:
spark.read.csv(SparkFiles.get("listings.csv"), header=True).show()

+-----+--------------------+-------+----------+-------------------+-------------+--------+---------+---------------+-----+--------------+-----------------+-----------+-----------------+------------------------------+----------------+
|   id|                name|host_id| host_name|neighbourhood_group|neighbourhood|latitude|longitude|      room_type|price|minimum_nights|number_of_reviews|last_review|reviews_per_month|calculated_host_listings_count|availability_365|
+-----+--------------------+-------+----------+-------------------+-------------+--------+---------+---------------+-----+--------------+-----------------+-----------+-----------------+------------------------------+----------------+
| 3781|HARBORSIDE-Walk t...|   4804|     Frank|               null|  East Boston|42.36413|-71.02991|Entire home/apt|  125|            28|               16| 2019-12-21|             0.27|                             1|              91|
| 5506|**$49 Special ** ...|   8229|     Terry|               nu

In [14]:
url = "http://data.insideairbnb.com/united-states/ma/boston/2016-09-07/visualisations/listings.csv"
spark.sparkContext.addFile(url)

In [15]:
spark.read.csv(SparkFiles.get("listings.csv"), header=True).show()

+--------+--------------------+--------+---------+-------------------+-------------+------------------+------------------+---------------+-----+--------------+-----------------+-----------+-----------------+------------------------------+----------------+
|      id|                name| host_id|host_name|neighbourhood_group|neighbourhood|          latitude|         longitude|      room_type|price|minimum_nights|number_of_reviews|last_review|reviews_per_month|calculated_host_listings_count|availability_365|
+--------+--------------------+--------+---------+-------------------+-------------+------------------+------------------+---------------+-----+--------------+-----------------+-----------+-----------------+------------------------------+----------------+
|12147973|Sunny Bungalow in...|31303940| Virginia|               null|   Roslindale|42.282618795779484|-71.13306792912681|Entire home/apt|  250|             2|                0|       null|             null|                         

In [12]:
spark.getConf().getAll()

AttributeError: 'SparkSession' object has no attribute 'getConf'