In [1]:
# install boto3 for AWS connection
dbutils.library.installPyPI("boto3", version="1.9.157")
dbutils.library.restartPython()

In [2]:
# install these Maven packages in the cluster
# JohnSnowLabs:spark-nlp:2.3.4

In [3]:
# import AWS credentials
# import config.py ##for local
%run "/dbfs/FileStore/tables/config" ##for databricks

In [4]:
import pandas as pd
import numpy as np

from pyspark.sql import SparkSession
from pyspark import SparkFiles
from pyspark.sql import DataFrame
from pyspark.sql.types import *
import pyspark.sql.functions as F

In [5]:
# get or create Spark session
app_name = "spark-airbnb-sentiment"
spark = SparkSession.builder.appName(app_name).getOrCreate()

In [6]:
import boto3

secret_name = my_secret_name
region_name = my_region_name
access_key  = my_access_key
secret_key  = my_secret_key

session      = boto3.session.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region_name)
client       = session.client('secretsmanager')
secret_value = client.get_secret_value(SecretId=secret_name)

In [7]:
import json
def get_connection(secret_value):
    return json.loads(secret_value['SecretString'])

In [8]:
connection = get_connection(secret_value)

# Postgres credentials
jdbcHostname = connection['host']
jdbcPort     = connection['port']
jdbcDatabase = "postgres"
dialect      = "postgresql"
jdbcUsername = connection['username']
jdbcPassword = connection['password']

jdbcUrl = f"jdbc:{dialect}://{jdbcHostname}:{jdbcPort}/{jdbcDatabase}"
connectionProperties = {
  "user"     : jdbcUsername,
  "password" : jdbcPassword,
  "driver"   : "org.postgresql.Driver" 
}

In [9]:
# Read from reviews_full table

table = "reviews_full"

reviews_df = spark.read.jdbc(url=jdbcUrl, table=table, properties=connectionProperties)
reviews_df.printSchema()

In [10]:
from pyspark.ml import Pipeline
import sparknlp
sparknlp.start()
from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp.pretrained import PretrainedPipeline

pipeline = PretrainedPipeline("analyze_sentiment", lang="en")

In [11]:
thousand_row = reviews_df.withColumnRenamed("comments", "text").limit(1000)

In [12]:
annotations_df = pipeline.transform(thousand_row)
# display(annotations_df)

In [13]:
# return the sentiment with the highest confidence value
@F.udf()
def highestConfidence(sentClassifications):
  idx = np.argmax([x["metadata"]["confidence"] for x in sentClassifications])
  return sentClassifications[idx]["result"]


# return sentiment of the longest sentence
@F.udf()
def longestSentiment(sentClassifications):
  sentLengths = [
      [x["result"], x["end"] - x["begin"]]
    for x in sentClassifications
  ]
  posSent = sum([x[1] for x in sentLengths if x[0] == "positive"])
  negSent = sum([x[1] for x in sentLengths if x[0] == "negative"])
  return "positive" if posSent>negSent else "negative"


# return the overall average sentiment by votes
@F.udf()
def averageSentiment(sentClassifications):
  sentConf = [
      [x["result"], x["metadata"]["confidence"]]
    for x in sentClassifications
  ]
  posSent = np.mean([float(x[1]) for x in sentConf if x[0] == "positive"])
  negSent = np.mean([float(x[1]) for x in sentConf if x[0] == "negative"])
  return "positive" if posSent>=negSent else "negative"


# return the overall average sentiment after normalizing by sentence length
@F.udf()
def classifySentiment(sentClassifications):
  
  sentConf = [
      [x["result"], x["metadata"]["confidence"]]
    for x in sentClassifications
  ]
  posSentConf = np.mean([float(x[1]) for x in sentConf if x[0] == "positive"])
  negSentConf = np.mean([float(x[1]) for x in sentConf if x[0] == "negative"])
  
  sentLengths = [
      [x["result"], x["end"] - x["begin"]]
    for x in sentClassifications
  ]
  posSentLengths = sum([x[1] for x in sentLengths if x[0] == "positive"])
  negSentLengths = sum([x[1] for x in sentLengths if x[0] == "negative"])
  
  # set to NaN if we have null comments
  if posSentLengths == 0 and negSentLengths == 0:
    return np.nan
  
  if abs(posSentLengths - negSentLengths)/(posSentLengths + negSentLengths) > 0.2:
    return "positive" if posSentLengths>negSentLengths else "negative"
  
  else:
    return "positive" if posSentConf>negSentConf else "negative"

In [14]:
annotations_df.printSchema()

In [15]:
@F.udf()
def example(sentiment):
  print(sentiment)

In [16]:
display(annotations_df)

listing_id,id,date,reviewer_id,reviewer_name,text,document,sentence,token,checked,sentiment
5456,16489,2009-11-07,46119,Kevin,"Very accommodating, great space.","List(List(document, 0, 31, Very accommodating, great space., Map(sentence -> 0), List()))","List(List(document, 0, 31, Very accommodating, great space., Map(sentence -> 0), List()))","List(List(token, 0, 3, Very, Map(sentence -> 0), List()), List(token, 5, 17, accommodating, Map(sentence -> 0), List()), List(token, 18, 18, ,, Map(sentence -> 0), List()), List(token, 20, 24, great, Map(sentence -> 0), List()), List(token, 26, 30, space, Map(sentence -> 0), List()), List(token, 31, 31, ., Map(sentence -> 0), List()))","List(List(token, 0, 3, Very, Map(confidence -> 1.0), List()), List(token, 5, 17, accommodating, Map(confidence -> 1.0), List()), List(token, 18, 18, ,, Map(confidence -> 0.0), List()), List(token, 20, 24, great, Map(confidence -> 1.0), List()), List(token, 26, 30, space, Map(confidence -> 1.0), List()), List(token, 31, 31, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 31, negative, Map(confidence -> 0.5582), List()))"
5456,18215,2009-11-24,54243,Jane,"6th Street is a bit of a walk but location is central to lots of shopping, nightlife and eateries. Place is clean, private and offers an array of videos and dcs. Great coffee, coffee pot and Fiesta Ware in the kitchen.","List(List(document, 0, 217, 6th Street is a bit of a walk but location is central to lots of shopping, nightlife and eateries. Place is clean, private and offers an array of videos and dcs. Great coffee, coffee pot and Fiesta Ware in the kitchen., Map(sentence -> 0), List()))","List(List(document, 0, 97, 6th Street is a bit of a walk but location is central to lots of shopping, nightlife and eateries., Map(sentence -> 0), List()), List(document, 99, 160, Place is clean, private and offers an array of videos and dcs., Map(sentence -> 1), List()), List(document, 162, 217, Great coffee, coffee pot and Fiesta Ware in the kitchen., Map(sentence -> 2), List()))","List(List(token, 0, 2, 6th, Map(sentence -> 0), List()), List(token, 4, 9, Street, Map(sentence -> 0), List()), List(token, 11, 12, is, Map(sentence -> 0), List()), List(token, 14, 14, a, Map(sentence -> 0), List()), List(token, 16, 18, bit, Map(sentence -> 0), List()), List(token, 20, 21, of, Map(sentence -> 0), List()), List(token, 23, 23, a, Map(sentence -> 0), List()), List(token, 25, 28, walk, Map(sentence -> 0), List()), List(token, 30, 32, but, Map(sentence -> 0), List()), List(token, 34, 41, location, Map(sentence -> 0), List()), List(token, 43, 44, is, Map(sentence -> 0), List()), List(token, 46, 52, central, Map(sentence -> 0), List()), List(token, 54, 55, to, Map(sentence -> 0), List()), List(token, 57, 60, lots, Map(sentence -> 0), List()), List(token, 62, 63, of, Map(sentence -> 0), List()), List(token, 65, 72, shopping, Map(sentence -> 0), List()), List(token, 73, 73, ,, Map(sentence -> 0), List()), List(token, 75, 83, nightlife, Map(sentence -> 0), List()), List(token, 85, 87, and, Map(sentence -> 0), List()), List(token, 89, 96, eateries, Map(sentence -> 0), List()), List(token, 97, 97, ., Map(sentence -> 0), List()), List(token, 99, 103, Place, Map(sentence -> 1), List()), List(token, 105, 106, is, Map(sentence -> 1), List()), List(token, 108, 112, clean, Map(sentence -> 1), List()), List(token, 113, 113, ,, Map(sentence -> 1), List()), List(token, 115, 121, private, Map(sentence -> 1), List()), List(token, 123, 125, and, Map(sentence -> 1), List()), List(token, 127, 132, offers, Map(sentence -> 1), List()), List(token, 134, 135, an, Map(sentence -> 1), List()), List(token, 137, 141, array, Map(sentence -> 1), List()), List(token, 143, 144, of, Map(sentence -> 1), List()), List(token, 146, 151, videos, Map(sentence -> 1), List()), List(token, 153, 155, and, Map(sentence -> 1), List()), List(token, 157, 159, dcs, Map(sentence -> 1), List()), List(token, 160, 160, ., Map(sentence -> 1), List()), List(token, 162, 166, Great, Map(sentence -> 2), List()), List(token, 168, 173, coffee, Map(sentence -> 2), List()), List(token, 174, 174, ,, Map(sentence -> 2), List()), List(token, 176, 181, coffee, Map(sentence -> 2), List()), List(token, 183, 185, pot, Map(sentence -> 2), List()), List(token, 187, 189, and, Map(sentence -> 2), List()), List(token, 191, 196, Fiesta, Map(sentence -> 2), List()), List(token, 198, 201, Ware, Map(sentence -> 2), List()), List(token, 203, 204, in, Map(sentence -> 2), List()), List(token, 206, 208, the, Map(sentence -> 2), List()), List(token, 210, 216, kitchen, Map(sentence -> 2), List()), List(token, 217, 217, ., Map(sentence -> 2), List()))","List(List(token, 0, 2, 6th, Map(confidence -> 1.0), List()), List(token, 4, 9, Street, Map(confidence -> 1.0), List()), List(token, 11, 12, is, Map(confidence -> 1.0), List()), List(token, 14, 14, a, Map(confidence -> 1.0), List()), List(token, 16, 18, bit, Map(confidence -> 1.0), List()), List(token, 20, 21, of, Map(confidence -> 1.0), List()), List(token, 23, 23, a, Map(confidence -> 1.0), List()), List(token, 25, 28, walk, Map(confidence -> 1.0), List()), List(token, 30, 32, but, Map(confidence -> 1.0), List()), List(token, 34, 41, location, Map(confidence -> 1.0), List()), List(token, 43, 44, is, Map(confidence -> 1.0), List()), List(token, 46, 52, central, Map(confidence -> 1.0), List()), List(token, 54, 55, to, Map(confidence -> 1.0), List()), List(token, 57, 60, lots, Map(confidence -> 1.0), List()), List(token, 62, 63, of, Map(confidence -> 1.0), List()), List(token, 65, 72, shopping, Map(confidence -> 1.0), List()), List(token, 73, 73, ,, Map(confidence -> 0.0), List()), List(token, 75, 83, nightlife, Map(confidence -> 1.0), List()), List(token, 85, 87, and, Map(confidence -> 1.0), List()), List(token, 89, 96, eateries, Map(confidence -> 1.0), List()), List(token, 97, 97, ., Map(confidence -> 0.0), List()), List(token, 99, 103, Place, Map(confidence -> 1.0), List()), List(token, 105, 106, is, Map(confidence -> 1.0), List()), List(token, 108, 112, clean, Map(confidence -> 1.0), List()), List(token, 113, 113, ,, Map(confidence -> 0.0), List()), List(token, 115, 121, private, Map(confidence -> 1.0), List()), List(token, 123, 125, and, Map(confidence -> 1.0), List()), List(token, 127, 132, offers, Map(confidence -> 1.0), List()), List(token, 134, 135, an, Map(confidence -> 1.0), List()), List(token, 137, 141, array, Map(confidence -> 1.0), List()), List(token, 143, 144, of, Map(confidence -> 1.0), List()), List(token, 146, 151, videos, Map(confidence -> 1.0), List()), List(token, 153, 155, and, Map(confidence -> 1.0), List()), List(token, 157, 159, dcs, Map(confidence -> 0.0), List()), List(token, 160, 160, ., Map(confidence -> 0.0), List()), List(token, 162, 166, Great, Map(confidence -> 1.0), List()), List(token, 168, 173, coffee, Map(confidence -> 1.0), List()), List(token, 174, 174, ,, Map(confidence -> 0.0), List()), List(token, 176, 181, coffee, Map(confidence -> 1.0), List()), List(token, 183, 185, pot, Map(confidence -> 1.0), List()), List(token, 187, 189, and, Map(confidence -> 1.0), List()), List(token, 191, 196, Fiesta, Map(confidence -> 1.0), List()), List(token, 198, 201, Ware, Map(confidence -> 1.0), List()), List(token, 203, 204, in, Map(confidence -> 1.0), List()), List(token, 206, 208, the, Map(confidence -> 1.0), List()), List(token, 210, 216, kitchen, Map(confidence -> 1.0), List()), List(token, 217, 217, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 97, negative, Map(confidence -> 0.5202), List()), List(sentiment, 99, 160, positive, Map(confidence -> 0.4819), List()), List(sentiment, 162, 217, positive, Map(confidence -> 0.4907), List()))"
5456,20136,2009-12-13,50357,Gerald,"Great cozy/modern space, nice location, free wifi, and accommodating hostess made this place perfect for our stay.","List(List(document, 0, 113, Great cozy/modern space, nice location, free wifi, and accommodating hostess made this place perfect for our stay., Map(sentence -> 0), List()))","List(List(document, 0, 113, Great cozy/modern space, nice location, free wifi, and accommodating hostess made this place perfect for our stay., Map(sentence -> 0), List()))","List(List(token, 0, 4, Great, Map(sentence -> 0), List()), List(token, 6, 16, cozy/modern, Map(sentence -> 0), List()), List(token, 18, 22, space, Map(sentence -> 0), List()), List(token, 23, 23, ,, Map(sentence -> 0), List()), List(token, 25, 28, nice, Map(sentence -> 0), List()), List(token, 30, 37, location, Map(sentence -> 0), List()), List(token, 38, 38, ,, Map(sentence -> 0), List()), List(token, 40, 43, free, Map(sentence -> 0), List()), List(token, 45, 48, wifi, Map(sentence -> 0), List()), List(token, 49, 49, ,, Map(sentence -> 0), List()), List(token, 51, 53, and, Map(sentence -> 0), List()), List(token, 55, 67, accommodating, Map(sentence -> 0), List()), List(token, 69, 75, hostess, Map(sentence -> 0), List()), List(token, 77, 80, made, Map(sentence -> 0), List()), List(token, 82, 85, this, Map(sentence -> 0), List()), List(token, 87, 91, place, Map(sentence -> 0), List()), List(token, 93, 99, perfect, Map(sentence -> 0), List()), List(token, 101, 103, for, Map(sentence -> 0), List()), List(token, 105, 107, our, Map(sentence -> 0), List()), List(token, 109, 112, stay, Map(sentence -> 0), List()), List(token, 113, 113, ., Map(sentence -> 0), List()))","List(List(token, 0, 4, Great, Map(confidence -> 1.0), List()), List(token, 6, 16, cozy/modern, Map(confidence -> 0.0), List()), List(token, 18, 22, space, Map(confidence -> 1.0), List()), List(token, 23, 23, ,, Map(confidence -> 0.0), List()), List(token, 25, 28, nice, Map(confidence -> 1.0), List()), List(token, 30, 37, location, Map(confidence -> 1.0), List()), List(token, 38, 38, ,, Map(confidence -> 0.0), List()), List(token, 40, 43, free, Map(confidence -> 1.0), List()), List(token, 45, 48, wifi, Map(confidence -> 1.0), List()), List(token, 49, 49, ,, Map(confidence -> 0.0), List()), List(token, 51, 53, and, Map(confidence -> 1.0), List()), List(token, 55, 67, accommodating, Map(confidence -> 1.0), List()), List(token, 69, 75, hostess, Map(confidence -> 1.0), List()), List(token, 77, 80, made, Map(confidence -> 1.0), List()), List(token, 82, 85, this, Map(confidence -> 1.0), List()), List(token, 87, 91, place, Map(confidence -> 1.0), List()), List(token, 93, 99, perfect, Map(confidence -> 1.0), List()), List(token, 101, 103, for, Map(confidence -> 1.0), List()), List(token, 105, 107, our, Map(confidence -> 1.0), List()), List(token, 109, 112, stay, Map(confidence -> 1.0), List()), List(token, 113, 113, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 113, positive, Map(confidence -> 0.4509), List()))"
5456,20640,2009-12-17,45412,Hannah,Sylvia's place was lovely! Perfect for me and my friend who stayed just before christmas 2009. Sylvia is super lovely and extra helpful! Thanks Sylvia!,"List(List(document, 0, 150, Sylvia's place was lovely! Perfect for me and my friend who stayed just before christmas 2009. Sylvia is super lovely and extra helpful! Thanks Sylvia!, Map(sentence -> 0), List()))","List(List(document, 0, 25, Sylvia's place was lovely!, Map(sentence -> 0), List()), List(document, 27, 93, Perfect for me and my friend who stayed just before christmas 2009., Map(sentence -> 1), List()), List(document, 95, 135, Sylvia is super lovely and extra helpful!, Map(sentence -> 2), List()), List(document, 137, 150, Thanks Sylvia!, Map(sentence -> 3), List()))","List(List(token, 0, 7, Sylvia's, Map(sentence -> 0), List()), List(token, 9, 13, place, Map(sentence -> 0), List()), List(token, 15, 17, was, Map(sentence -> 0), List()), List(token, 19, 24, lovely, Map(sentence -> 0), List()), List(token, 25, 25, !, Map(sentence -> 0), List()), List(token, 27, 33, Perfect, Map(sentence -> 1), List()), List(token, 35, 37, for, Map(sentence -> 1), List()), List(token, 39, 40, me, Map(sentence -> 1), List()), List(token, 42, 44, and, Map(sentence -> 1), List()), List(token, 46, 47, my, Map(sentence -> 1), List()), List(token, 49, 54, friend, Map(sentence -> 1), List()), List(token, 56, 58, who, Map(sentence -> 1), List()), List(token, 60, 65, stayed, Map(sentence -> 1), List()), List(token, 67, 70, just, Map(sentence -> 1), List()), List(token, 72, 77, before, Map(sentence -> 1), List()), List(token, 79, 87, christmas, Map(sentence -> 1), List()), List(token, 89, 92, 2009, Map(sentence -> 1), List()), List(token, 93, 93, ., Map(sentence -> 1), List()), List(token, 95, 100, Sylvia, Map(sentence -> 2), List()), List(token, 102, 103, is, Map(sentence -> 2), List()), List(token, 105, 109, super, Map(sentence -> 2), List()), List(token, 111, 116, lovely, Map(sentence -> 2), List()), List(token, 118, 120, and, Map(sentence -> 2), List()), List(token, 122, 126, extra, Map(sentence -> 2), List()), List(token, 128, 134, helpful, Map(sentence -> 2), List()), List(token, 135, 135, !, Map(sentence -> 2), List()), List(token, 137, 142, Thanks, Map(sentence -> 3), List()), List(token, 144, 149, Sylvia, Map(sentence -> 3), List()), List(token, 150, 150, !, Map(sentence -> 3), List()))","List(List(token, 0, 7, Sylvia's, Map(confidence -> 0.0), List()), List(token, 9, 13, place, Map(confidence -> 1.0), List()), List(token, 15, 17, was, Map(confidence -> 1.0), List()), List(token, 19, 24, lovely, Map(confidence -> 1.0), List()), List(token, 25, 25, !, Map(confidence -> 0.0), List()), List(token, 27, 33, Perfect, Map(confidence -> 1.0), List()), List(token, 35, 37, for, Map(confidence -> 1.0), List()), List(token, 39, 40, me, Map(confidence -> 1.0), List()), List(token, 42, 44, and, Map(confidence -> 1.0), List()), List(token, 46, 47, my, Map(confidence -> 1.0), List()), List(token, 49, 54, friend, Map(confidence -> 1.0), List()), List(token, 56, 58, who, Map(confidence -> 1.0), List()), List(token, 60, 65, stayed, Map(confidence -> 1.0), List()), List(token, 67, 70, just, Map(confidence -> 1.0), List()), List(token, 72, 77, before, Map(confidence -> 1.0), List()), List(token, 79, 87, christmas, Map(confidence -> 0.0), List()), List(token, 89, 92, 2009, Map(confidence -> 0.0), List()), List(token, 93, 93, ., Map(confidence -> 0.0), List()), List(token, 95, 100, Sylvia, Map(confidence -> 1.0), List()), List(token, 102, 103, is, Map(confidence -> 1.0), List()), List(token, 105, 109, super, Map(confidence -> 1.0), List()), List(token, 111, 116, lovely, Map(confidence -> 1.0), List()), List(token, 118, 120, and, Map(confidence -> 1.0), List()), List(token, 122, 126, extra, Map(confidence -> 1.0), List()), List(token, 128, 134, helpful, Map(confidence -> 1.0), List()), List(token, 135, 135, !, Map(confidence -> 0.0), List()), List(token, 137, 142, Thanks, Map(confidence -> 1.0), List()), List(token, 144, 149, Sylvia, Map(confidence -> 1.0), List()), List(token, 150, 150, !, Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 25, positive, Map(confidence -> 0.7493), List()), List(sentiment, 27, 93, positive, Map(confidence -> 0.4745), List()), List(sentiment, 95, 135, negative, Map(confidence -> 0.4042), List()), List(sentiment, 137, 150, positive, Map(confidence -> 0.7150), List()))"
5456,21240,2009-12-23,46775,Rochelle,"Sylvia was very kind and accommodating. The space itself was a very quaint home away from home. I work online so having the internet is essential. Sylvia's place work out great! Also, the location was good - not far from downtown Austin and there are a couple of great coffee shops in the neighborhood itself.","List(List(document, 0, 309, Sylvia was very kind and accommodating. The space itself was a very quaint home away from home. I work online so having the internet is essential. Sylvia's place work out great! Also, the location was good - not far from downtown Austin and there are a couple of great coffee shops in the neighborhood itself. , Map(sentence -> 0), List()))","List(List(document, 0, 38, Sylvia was very kind and accommodating., Map(sentence -> 0), List()), List(document, 40, 94, The space itself was a very quaint home away from home., Map(sentence -> 1), List()), List(document, 96, 145, I work online so having the internet is essential., Map(sentence -> 2), List()), List(document, 147, 176, Sylvia's place work out great!, Map(sentence -> 3), List()), List(document, 178, 308, Also, the location was good - not far from downtown Austin and there are a couple of great coffee shops in the neighborhood itself., Map(sentence -> 4), List()))","List(List(token, 0, 5, Sylvia, Map(sentence -> 0), List()), List(token, 7, 9, was, Map(sentence -> 0), List()), List(token, 11, 14, very, Map(sentence -> 0), List()), List(token, 16, 19, kind, Map(sentence -> 0), List()), List(token, 21, 23, and, Map(sentence -> 0), List()), List(token, 25, 37, accommodating, Map(sentence -> 0), List()), List(token, 38, 38, ., Map(sentence -> 0), List()), List(token, 40, 42, The, Map(sentence -> 1), List()), List(token, 44, 48, space, Map(sentence -> 1), List()), List(token, 50, 55, itself, Map(sentence -> 1), List()), List(token, 57, 59, was, Map(sentence -> 1), List()), List(token, 61, 61, a, Map(sentence -> 1), List()), List(token, 63, 66, very, Map(sentence -> 1), List()), List(token, 68, 73, quaint, Map(sentence -> 1), List()), List(token, 75, 78, home, Map(sentence -> 1), List()), List(token, 80, 83, away, Map(sentence -> 1), List()), List(token, 85, 88, from, Map(sentence -> 1), List()), List(token, 90, 93, home, Map(sentence -> 1), List()), List(token, 94, 94, ., Map(sentence -> 1), List()), List(token, 96, 96, I, Map(sentence -> 2), List()), List(token, 98, 101, work, Map(sentence -> 2), List()), List(token, 103, 108, online, Map(sentence -> 2), List()), List(token, 110, 111, so, Map(sentence -> 2), List()), List(token, 113, 118, having, Map(sentence -> 2), List()), List(token, 120, 122, the, Map(sentence -> 2), List()), List(token, 124, 131, internet, Map(sentence -> 2), List()), List(token, 133, 134, is, Map(sentence -> 2), List()), List(token, 136, 144, essential, Map(sentence -> 2), List()), List(token, 145, 145, ., Map(sentence -> 2), List()), List(token, 147, 154, Sylvia's, Map(sentence -> 3), List()), List(token, 156, 160, place, Map(sentence -> 3), List()), List(token, 162, 165, work, Map(sentence -> 3), List()), List(token, 167, 169, out, Map(sentence -> 3), List()), List(token, 171, 175, great, Map(sentence -> 3), List()), List(token, 176, 176, !, Map(sentence -> 3), List()), List(token, 178, 181, Also, Map(sentence -> 4), List()), List(token, 182, 182, ,, Map(sentence -> 4), List()), List(token, 184, 186, the, Map(sentence -> 4), List()), List(token, 188, 195, location, Map(sentence -> 4), List()), List(token, 197, 199, was, Map(sentence -> 4), List()), List(token, 201, 204, good, Map(sentence -> 4), List()), List(token, 206, 206, -, Map(sentence -> 4), List()), List(token, 208, 210, not, Map(sentence -> 4), List()), List(token, 212, 214, far, Map(sentence -> 4), List()), List(token, 216, 219, from, Map(sentence -> 4), List()), List(token, 221, 228, downtown, Map(sentence -> 4), List()), List(token, 230, 235, Austin, Map(sentence -> 4), List()), List(token, 237, 239, and, Map(sentence -> 4), List()), List(token, 241, 245, there, Map(sentence -> 4), List()), List(token, 247, 249, are, Map(sentence -> 4), List()), List(token, 251, 251, a, Map(sentence -> 4), List()), List(token, 253, 258, couple, Map(sentence -> 4), List()), List(token, 260, 261, of, Map(sentence -> 4), List()), List(token, 263, 267, great, Map(sentence -> 4), List()), List(token, 269, 274, coffee, Map(sentence -> 4), List()), List(token, 276, 280, shops, Map(sentence -> 4), List()), List(token, 282, 283, in, Map(sentence -> 4), List()), List(token, 285, 287, the, Map(sentence -> 4), List()), List(token, 289, 300, neighborhood, Map(sentence -> 4), List()), List(token, 302, 307, itself, Map(sentence -> 4), List()), List(token, 308, 308, ., Map(sentence -> 4), List()))","List(List(token, 0, 5, Sylvia, Map(confidence -> 1.0), List()), List(token, 7, 9, was, Map(confidence -> 1.0), List()), List(token, 11, 14, very, Map(confidence -> 1.0), List()), List(token, 16, 19, kind, Map(confidence -> 1.0), List()), List(token, 21, 23, and, Map(confidence -> 1.0), List()), List(token, 25, 37, accommodating, Map(confidence -> 1.0), List()), List(token, 38, 38, ., Map(confidence -> 0.0), List()), List(token, 40, 42, The, Map(confidence -> 1.0), List()), List(token, 44, 48, space, Map(confidence -> 1.0), List()), List(token, 50, 55, itself, Map(confidence -> 1.0), List()), List(token, 57, 59, was, Map(confidence -> 1.0), List()), List(token, 61, 61, a, Map(confidence -> 1.0), List()), List(token, 63, 66, very, Map(confidence -> 1.0), List()), List(token, 68, 73, quaint, Map(confidence -> 1.0), List()), List(token, 75, 78, home, Map(confidence -> 1.0), List()), List(token, 80, 83, away, Map(confidence -> 1.0), List()), List(token, 85, 88, from, Map(confidence -> 1.0), List()), List(token, 90, 93, home, Map(confidence -> 1.0), List()), List(token, 94, 94, ., Map(confidence -> 0.0), List()), List(token, 96, 96, I, Map(confidence -> 1.0), List()), List(token, 98, 101, work, Map(confidence -> 1.0), List()), List(token, 103, 108, online, Map(confidence -> 1.0), List()), List(token, 110, 111, so, Map(confidence -> 1.0), List()), List(token, 113, 118, having, Map(confidence -> 1.0), List()), List(token, 120, 122, the, Map(confidence -> 1.0), List()), List(token, 124, 131, internet, Map(confidence -> 1.0), List()), List(token, 133, 134, is, Map(confidence -> 1.0), List()), List(token, 136, 144, essential, Map(confidence -> 1.0), List()), List(token, 145, 145, ., Map(confidence -> 0.0), List()), List(token, 147, 154, Sylvia's, Map(confidence -> 0.0), List()), List(token, 156, 160, place, Map(confidence -> 1.0), List()), List(token, 162, 165, work, Map(confidence -> 1.0), List()), List(token, 167, 169, out, Map(confidence -> 1.0), List()), List(token, 171, 175, great, Map(confidence -> 1.0), List()), List(token, 176, 176, !, Map(confidence -> 0.0), List()), List(token, 178, 181, Also, Map(confidence -> 1.0), List()), List(token, 182, 182, ,, Map(confidence -> 0.0), List()), List(token, 184, 186, the, Map(confidence -> 1.0), List()), List(token, 188, 195, location, Map(confidence -> 1.0), List()), List(token, 197, 199, was, Map(confidence -> 1.0), List()), List(token, 201, 204, good, Map(confidence -> 1.0), List()), List(token, 206, 206, -, Map(confidence -> 0.0), List()), List(token, 208, 210, not, Map(confidence -> 1.0), List()), List(token, 212, 214, far, Map(confidence -> 1.0), List()), List(token, 216, 219, from, Map(confidence -> 1.0), List()), List(token, 221, 228, downtown, Map(confidence -> 1.0), List()), List(token, 230, 235, Austin, Map(confidence -> 1.0), List()), List(token, 237, 239, and, Map(confidence -> 1.0), List()), List(token, 241, 245, there, Map(confidence -> 1.0), List()), List(token, 247, 249, are, Map(confidence -> 1.0), List()), List(token, 251, 251, a, Map(confidence -> 1.0), List()), List(token, 253, 258, couple, Map(confidence -> 1.0), List()), List(token, 260, 261, of, Map(confidence -> 1.0), List()), List(token, 263, 267, great, Map(confidence -> 1.0), List()), List(token, 269, 274, coffee, Map(confidence -> 1.0), List()), List(token, 276, 280, shops, Map(confidence -> 1.0), List()), List(token, 282, 283, in, Map(confidence -> 1.0), List()), List(token, 285, 287, the, Map(confidence -> 1.0), List()), List(token, 289, 300, neighborhood, Map(confidence -> 1.0), List()), List(token, 302, 307, itself, Map(confidence -> 1.0), List()), List(token, 308, 308, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 38, positive, Map(confidence -> 0.5150), List()), List(sentiment, 40, 94, negative, Map(confidence -> 0.4519), List()), List(sentiment, 96, 145, negative, Map(confidence -> 0.4250), List()), List(sentiment, 147, 176, positive, Map(confidence -> 0.6271), List()), List(sentiment, 178, 308, positive, Map(confidence -> 0.4969), List()))"
5456,24587,2010-01-22,64841,Vinisha,"The apt was clean, comfortable and cosy. Sylvia was very accommodating.","List(List(document, 0, 70, The apt was clean, comfortable and cosy. Sylvia was very accommodating., Map(sentence -> 0), List()))","List(List(document, 0, 39, The apt was clean, comfortable and cosy., Map(sentence -> 0), List()), List(document, 41, 70, Sylvia was very accommodating., Map(sentence -> 1), List()))","List(List(token, 0, 2, The, Map(sentence -> 0), List()), List(token, 4, 6, apt, Map(sentence -> 0), List()), List(token, 8, 10, was, Map(sentence -> 0), List()), List(token, 12, 16, clean, Map(sentence -> 0), List()), List(token, 17, 17, ,, Map(sentence -> 0), List()), List(token, 19, 29, comfortable, Map(sentence -> 0), List()), List(token, 31, 33, and, Map(sentence -> 0), List()), List(token, 35, 38, cosy, Map(sentence -> 0), List()), List(token, 39, 39, ., Map(sentence -> 0), List()), List(token, 41, 46, Sylvia, Map(sentence -> 1), List()), List(token, 48, 50, was, Map(sentence -> 1), List()), List(token, 52, 55, very, Map(sentence -> 1), List()), List(token, 57, 69, accommodating, Map(sentence -> 1), List()), List(token, 70, 70, ., Map(sentence -> 1), List()))","List(List(token, 0, 2, The, Map(confidence -> 1.0), List()), List(token, 4, 6, apt, Map(confidence -> 1.0), List()), List(token, 8, 10, was, Map(confidence -> 1.0), List()), List(token, 12, 16, clean, Map(confidence -> 1.0), List()), List(token, 17, 17, ,, Map(confidence -> 0.0), List()), List(token, 19, 29, comfortable, Map(confidence -> 1.0), List()), List(token, 31, 33, and, Map(confidence -> 1.0), List()), List(token, 35, 38, cosy, Map(confidence -> 1.0), List()), List(token, 39, 39, ., Map(confidence -> 0.0), List()), List(token, 41, 46, Sylvia, Map(confidence -> 1.0), List()), List(token, 48, 50, was, Map(confidence -> 1.0), List()), List(token, 52, 55, very, Map(confidence -> 1.0), List()), List(token, 57, 69, accommodating, Map(confidence -> 1.0), List()), List(token, 70, 70, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 39, positive, Map(confidence -> 0.5097), List()), List(sentiment, 41, 70, positive, Map(confidence -> 0.5332), List()))"
5456,24721,2010-01-24,62019,Christine,"I'm late in posting this but that is no indication of how my stay went. it was fantastic! Sylvia is more than accommodating, just like everyone else has mentioned. it's all true. not only did she pick me up downtown, she took me to the airport for $10, and then when i realized i left my iphone in the apartment, she went to GREAT lengths to send it back to me. i almost had a panic attack when i realized i left it there, but was only consoled in the fact that at least it was in very good hands. the apartment itself was perfect, with plenty of windows, free coffee, etc. i would definitely stay again. Thanks Sylvia! sorry for the late review.","List(List(document, 0, 652, I'm late in posting this but that is no indication of how my stay went. it was fantastic! Sylvia is more than accommodating, just like everyone else has mentioned. it's all true. not only did she pick me up downtown, she took me to the airport for $10, and then when i realized i left my iphone in the apartment, she went to GREAT lengths to send it back to me. i almost had a panic attack when i realized i left it there, but was only consoled in the fact that at least it was in very good hands. the apartment itself was perfect, with plenty of windows, free coffee, etc. i would definitely stay again. Thanks Sylvia! sorry for the late review., Map(sentence -> 0), List()))","List(List(document, 0, 70, I'm late in posting this but that is no indication of how my stay went., Map(sentence -> 0), List()), List(document, 73, 89, it was fantastic!, Map(sentence -> 1), List()), List(document, 92, 164, Sylvia is more than accommodating, just like everyone else has mentioned., Map(sentence -> 2), List()), List(document, 167, 180, it's all true., Map(sentence -> 3), List()), List(document, 182, 363, not only did she pick me up downtown, she took me to the airport for $10, and then when i realized i left my iphone in the apartment, she went to GREAT lengths to send it back to me., Map(sentence -> 4), List()), List(document, 366, 500, i almost had a panic attack when i realized i left it there, but was only consoled in the fact that at least it was in very good hands., Map(sentence -> 5), List()), List(document, 503, 577, the apartment itself was perfect, with plenty of windows, free coffee, etc., Map(sentence -> 6), List()), List(document, 580, 609, i would definitely stay again., Map(sentence -> 7), List()), List(document, 611, 624, Thanks Sylvia!, Map(sentence -> 8), List()), List(document, 627, 652, sorry for the late review., Map(sentence -> 9), List()))","List(List(token, 0, 2, I'm, Map(sentence -> 0), List()), List(token, 4, 7, late, Map(sentence -> 0), List()), List(token, 9, 10, in, Map(sentence -> 0), List()), List(token, 12, 18, posting, Map(sentence -> 0), List()), List(token, 20, 23, this, Map(sentence -> 0), List()), List(token, 25, 27, but, Map(sentence -> 0), List()), List(token, 29, 32, that, Map(sentence -> 0), List()), List(token, 34, 35, is, Map(sentence -> 0), List()), List(token, 37, 38, no, Map(sentence -> 0), List()), List(token, 40, 49, indication, Map(sentence -> 0), List()), List(token, 51, 52, of, Map(sentence -> 0), List()), List(token, 54, 56, how, Map(sentence -> 0), List()), List(token, 58, 59, my, Map(sentence -> 0), List()), List(token, 61, 64, stay, Map(sentence -> 0), List()), List(token, 66, 69, went, Map(sentence -> 0), List()), List(token, 70, 70, ., Map(sentence -> 0), List()), List(token, 73, 74, it, Map(sentence -> 1), List()), List(token, 76, 78, was, Map(sentence -> 1), List()), List(token, 80, 88, fantastic, Map(sentence -> 1), List()), List(token, 89, 89, !, Map(sentence -> 1), List()), List(token, 92, 97, Sylvia, Map(sentence -> 2), List()), List(token, 99, 100, is, Map(sentence -> 2), List()), List(token, 102, 105, more, Map(sentence -> 2), List()), List(token, 107, 110, than, Map(sentence -> 2), List()), List(token, 112, 124, accommodating, Map(sentence -> 2), List()), List(token, 125, 125, ,, Map(sentence -> 2), List()), List(token, 127, 130, just, Map(sentence -> 2), List()), List(token, 132, 135, like, Map(sentence -> 2), List()), List(token, 137, 144, everyone, Map(sentence -> 2), List()), List(token, 146, 149, else, Map(sentence -> 2), List()), List(token, 151, 153, has, Map(sentence -> 2), List()), List(token, 155, 163, mentioned, Map(sentence -> 2), List()), List(token, 164, 164, ., Map(sentence -> 2), List()), List(token, 167, 170, it's, Map(sentence -> 3), List()), List(token, 172, 174, all, Map(sentence -> 3), List()), List(token, 176, 179, true, Map(sentence -> 3), List()), List(token, 180, 180, ., Map(sentence -> 3), List()), List(token, 182, 184, not, Map(sentence -> 4), List()), List(token, 186, 189, only, Map(sentence -> 4), List()), List(token, 191, 193, did, Map(sentence -> 4), List()), List(token, 195, 197, she, Map(sentence -> 4), List()), List(token, 199, 202, pick, Map(sentence -> 4), List()), List(token, 204, 205, me, Map(sentence -> 4), List()), List(token, 207, 208, up, Map(sentence -> 4), List()), List(token, 210, 217, downtown, Map(sentence -> 4), List()), List(token, 218, 218, ,, Map(sentence -> 4), List()), List(token, 220, 222, she, Map(sentence -> 4), List()), List(token, 224, 227, took, Map(sentence -> 4), List()), List(token, 229, 230, me, Map(sentence -> 4), List()), List(token, 232, 233, to, Map(sentence -> 4), List()), List(token, 235, 237, the, Map(sentence -> 4), List()), List(token, 239, 245, airport, Map(sentence -> 4), List()), List(token, 247, 249, for, Map(sentence -> 4), List()), List(token, 251, 253, $10, Map(sentence -> 4), List()), List(token, 254, 254, ,, Map(sentence -> 4), List()), List(token, 256, 258, and, Map(sentence -> 4), List()), List(token, 260, 263, then, Map(sentence -> 4), List()), List(token, 265, 268, when, Map(sentence -> 4), List()), List(token, 270, 270, i, Map(sentence -> 4), List()), List(token, 272, 279, realized, Map(sentence -> 4), List()), List(token, 281, 281, i, Map(sentence -> 4), List()), List(token, 283, 286, left, Map(sentence -> 4), List()), List(token, 288, 289, my, Map(sentence -> 4), List()), List(token, 291, 296, iphone, Map(sentence -> 4), List()), List(token, 298, 299, in, Map(sentence -> 4), List()), List(token, 301, 303, the, Map(sentence -> 4), List()), List(token, 305, 313, apartment, Map(sentence -> 4), List()), List(token, 314, 314, ,, Map(sentence -> 4), List()), List(token, 316, 318, she, Map(sentence -> 4), List()), List(token, 320, 323, went, Map(sentence -> 4), List()), List(token, 325, 326, to, Map(sentence -> 4), List()), List(token, 328, 332, GREAT, Map(sentence -> 4), List()), List(token, 334, 340, lengths, Map(sentence -> 4), List()), List(token, 342, 343, to, Map(sentence -> 4), List()), List(token, 345, 348, send, Map(sentence -> 4), List()), List(token, 350, 351, it, Map(sentence -> 4), List()), List(token, 353, 356, back, Map(sentence -> 4), List()), List(token, 358, 359, to, Map(sentence -> 4), List()), List(token, 361, 362, me, Map(sentence -> 4), List()), List(token, 363, 363, ., Map(sentence -> 4), List()), List(token, 366, 366, i, Map(sentence -> 5), List()), List(token, 368, 373, almost, Map(sentence -> 5), List()), List(token, 375, 377, had, Map(sentence -> 5), List()), List(token, 379, 379, a, Map(sentence -> 5), List()), List(token, 381, 385, panic, Map(sentence -> 5), List()), List(token, 387, 392, attack, Map(sentence -> 5), List()), List(token, 394, 397, when, Map(sentence -> 5), List()), List(token, 399, 399, i, Map(sentence -> 5), List()), List(token, 401, 408, realized, Map(sentence -> 5), List()), List(token, 410, 410, i, Map(sentence -> 5), List()), List(token, 412, 415, left, Map(sentence -> 5), List()), List(token, 417, 418, it, Map(sentence -> 5), List()), List(token, 420, 424, there, Map(sentence -> 5), List()), List(token, 425, 425, ,, Map(sentence -> 5), List()), List(token, 427, 429, but, Map(sentence -> 5), List()), List(token, 431, 433, was, Map(sentence -> 5), List()), List(token, 435, 438, only, Map(sentence -> 5), List()), List(token, 440, 447, consoled, Map(sentence -> 5), List()), List(token, 449, 450, in, Map(sentence -> 5), List()), List(token, 452, 454, the, Map(sentence -> 5), List()), List(token, 456, 459, fact, Map(sentence -> 5), List()), List(token, 461, 464, that, Map(sentence -> 5), List()), List(token, 466, 467, at, Map(sentence -> 5), List()), List(token, 469, 473, least, Map(sentence -> 5), List()), List(token, 475, 476, it, Map(sentence -> 5), List()), List(token, 478, 480, was, Map(sentence -> 5), List()), List(token, 482, 483, in, Map(sentence -> 5), List()), List(token, 485, 488, very, Map(sentence -> 5), List()), List(token, 490, 493, good, Map(sentence -> 5), List()), List(token, 495, 499, hands, Map(sentence -> 5), List()), List(token, 500, 500, ., Map(sentence -> 5), List()), List(token, 503, 505, the, Map(sentence -> 6), List()), List(token, 507, 515, apartment, Map(sentence -> 6), List()), List(token, 517, 522, itself, Map(sentence -> 6), List()), List(token, 524, 526, was, Map(sentence -> 6), List()), List(token, 528, 534, perfect, Map(sentence -> 6), List()), List(token, 535, 535, ,, Map(sentence -> 6), List()), List(token, 537, 540, with, Map(sentence -> 6), List()), List(token, 542, 547, plenty, Map(sentence -> 6), List()), List(token, 549, 550, of, Map(sentence -> 6), List()), List(token, 552, 558, windows, Map(sentence -> 6), List()), List(token, 559, 559, ,, Map(sentence -> 6), List()), List(token, 561, 564, free, Map(sentence -> 6), List()), List(token, 566, 571, coffee, Map(sentence -> 6), List()), List(token, 572, 572, ,, Map(sentence -> 6), List()), List(token, 574, 576, etc, Map(sentence -> 6), List()), List(token, 577, 577, ., Map(sentence -> 6), List()), List(token, 580, 580, i, Map(sentence -> 7), List()), List(token, 582, 586, would, Map(sentence -> 7), List()), List(token, 588, 597, definitely, Map(sentence -> 7), List()), List(token, 599, 602, stay, Map(sentence -> 7), List()), List(token, 604, 608, again, Map(sentence -> 7), List()), List(token, 609, 609, ., Map(sentence -> 7), List()), List(token, 611, 616, Thanks, Map(sentence -> 8), List()), List(token, 618, 623, Sylvia, Map(sentence -> 8), List()), List(token, 624, 624, !, Map(sentence -> 8), List()), List(token, 627, 631, sorry, Map(sentence -> 9), List()), List(token, 633, 635, for, Map(sentence -> 9), List()), List(token, 637, 639, the, Map(sentence -> 9), List()), List(token, 641, 644, late, Map(sentence -> 9), List()), List(token, 646, 651, review, Map(sentence -> 9), List()), List(token, 652, 652, ., Map(sentence -> 9), List()))","List(List(token, 0, 2, I'm, Map(confidence -> 1.0), List()), List(token, 4, 7, late, Map(confidence -> 1.0), List()), List(token, 9, 10, in, Map(confidence -> 1.0), List()), List(token, 12, 18, posting, Map(confidence -> 1.0), List()), List(token, 20, 23, this, Map(confidence -> 1.0), List()), List(token, 25, 27, but, Map(confidence -> 1.0), List()), List(token, 29, 32, that, Map(confidence -> 1.0), List()), List(token, 34, 35, is, Map(confidence -> 1.0), List()), List(token, 37, 38, no, Map(confidence -> 1.0), List()), List(token, 40, 49, indication, Map(confidence -> 1.0), List()), List(token, 51, 52, of, Map(confidence -> 1.0), List()), List(token, 54, 56, how, Map(confidence -> 1.0), List()), List(token, 58, 59, my, Map(confidence -> 1.0), List()), List(token, 61, 64, stay, Map(confidence -> 1.0), List()), List(token, 66, 69, went, Map(confidence -> 1.0), List()), List(token, 70, 70, ., Map(confidence -> 0.0), List()), List(token, 73, 74, it, Map(confidence -> 1.0), List()), List(token, 76, 78, was, Map(confidence -> 1.0), List()), List(token, 80, 88, fantastic, Map(confidence -> 1.0), List()), List(token, 89, 89, !, Map(confidence -> 0.0), List()), List(token, 92, 97, Sylvia, Map(confidence -> 1.0), List()), List(token, 99, 100, is, Map(confidence -> 1.0), List()), List(token, 102, 105, more, Map(confidence -> 1.0), List()), List(token, 107, 110, than, Map(confidence -> 1.0), List()), List(token, 112, 124, accommodating, Map(confidence -> 1.0), List()), List(token, 125, 125, ,, Map(confidence -> 0.0), List()), List(token, 127, 130, just, Map(confidence -> 1.0), List()), List(token, 132, 135, like, Map(confidence -> 1.0), List()), List(token, 137, 144, everyone, Map(confidence -> 1.0), List()), List(token, 146, 149, else, Map(confidence -> 1.0), List()), List(token, 151, 153, has, Map(confidence -> 1.0), List()), List(token, 155, 163, mentioned, Map(confidence -> 1.0), List()), List(token, 164, 164, ., Map(confidence -> 0.0), List()), List(token, 167, 170, it's, Map(confidence -> 1.0), List()), List(token, 172, 174, all, Map(confidence -> 1.0), List()), List(token, 176, 179, true, Map(confidence -> 1.0), List()), List(token, 180, 180, ., Map(confidence -> 0.0), List()), List(token, 182, 184, not, Map(confidence -> 1.0), List()), List(token, 186, 189, only, Map(confidence -> 1.0), List()), List(token, 191, 193, did, Map(confidence -> 1.0), List()), List(token, 195, 197, she, Map(confidence -> 1.0), List()), List(token, 199, 202, pick, Map(confidence -> 1.0), List()), List(token, 204, 205, me, Map(confidence -> 1.0), List()), List(token, 207, 208, up, Map(confidence -> 1.0), List()), List(token, 210, 217, downtown, Map(confidence -> 1.0), List()), List(token, 218, 218, ,, Map(confidence -> 0.0), List()), List(token, 220, 222, she, Map(confidence -> 1.0), List()), List(token, 224, 227, took, Map(confidence -> 1.0), List()), List(token, 229, 230, me, Map(confidence -> 1.0), List()), List(token, 232, 233, to, Map(confidence -> 1.0), List()), List(token, 235, 237, the, Map(confidence -> 1.0), List()), List(token, 239, 245, airport, Map(confidence -> 1.0), List()), List(token, 247, 249, for, Map(confidence -> 1.0), List()), List(token, 251, 253, $10, Map(confidence -> 0.0), List()), List(token, 254, 254, ,, Map(confidence -> 0.0), List()), List(token, 256, 258, and, Map(confidence -> 1.0), List()), List(token, 260, 263, then, Map(confidence -> 1.0), List()), List(token, 265, 268, when, Map(confidence -> 1.0), List()), List(token, 270, 270, i, Map(confidence -> 1.0), List()), List(token, 272, 279, realized, Map(confidence -> 1.0), List()), List(token, 281, 281, i, Map(confidence -> 1.0), List()), List(token, 283, 286, left, Map(confidence -> 1.0), List()), List(token, 288, 289, my, Map(confidence -> 1.0), List()), List(token, 291, 296, aphony, Map(confidence -> 1.0), List()), List(token, 298, 299, in, Map(confidence -> 1.0), List()), List(token, 301, 303, the, Map(confidence -> 1.0), List()), List(token, 305, 313, apartment, Map(confidence -> 1.0), List()), List(token, 314, 314, ,, Map(confidence -> 0.0), List()), List(token, 316, 318, she, Map(confidence -> 1.0), List()), List(token, 320, 323, went, Map(confidence -> 1.0), List()), List(token, 325, 326, to, Map(confidence -> 1.0), List()), List(token, 328, 332, GREAT, Map(confidence -> 0.0), List()), List(token, 334, 340, lengths, Map(confidence -> 1.0), List()), List(token, 342, 343, to, Map(confidence -> 1.0), List()), List(token, 345, 348, send, Map(confidence -> 1.0), List()), List(token, 350, 351, it, Map(confidence -> 1.0), List()), List(token, 353, 356, back, Map(confidence -> 1.0), List()), List(token, 358, 359, to, Map(confidence -> 1.0), List()), List(token, 361, 362, me, Map(confidence -> 1.0), List()), List(token, 363, 363, ., Map(confidence -> 0.0), List()), List(token, 366, 366, i, Map(confidence -> 1.0), List()), List(token, 368, 373, almost, Map(confidence -> 1.0), List()), List(token, 375, 377, had, Map(confidence -> 1.0), List()), List(token, 379, 379, a, Map(confidence -> 1.0), List()), List(token, 381, 385, panic, Map(confidence -> 1.0), List()), List(token, 387, 392, attack, Map(confidence -> 1.0), List()), List(token, 394, 397, when, Map(confidence -> 1.0), List()), List(token, 399, 399, i, Map(confidence -> 1.0), List()), List(token, 401, 408, realized, Map(confidence -> 1.0), List()), List(token, 410, 410, i, Map(confidence -> 1.0), List()), List(token, 412, 415, left, Map(confidence -> 1.0), List()), List(token, 417, 418, it, Map(confidence -> 1.0), List()), List(token, 420, 424, there, Map(confidence -> 1.0), List()), List(token, 425, 425, ,, Map(confidence -> 0.0), List()), List(token, 427, 429, but, Map(confidence -> 1.0), List()), List(token, 431, 433, was, Map(confidence -> 1.0), List()), List(token, 435, 438, only, Map(confidence -> 1.0), List()), List(token, 440, 447, consoled, Map(confidence -> 1.0), List()), List(token, 449, 450, in, Map(confidence -> 1.0), List()), List(token, 452, 454, the, Map(confidence -> 1.0), List()), List(token, 456, 459, fact, Map(confidence -> 1.0), List()), List(token, 461, 464, that, Map(confidence -> 1.0), List()), List(token, 466, 467, at, Map(confidence -> 1.0), List()), List(token, 469, 473, least, Map(confidence -> 1.0), List()), List(token, 475, 476, it, Map(confidence -> 1.0), List()), List(token, 478, 480, was, Map(confidence -> 1.0), List()), List(token, 482, 483, in, Map(confidence -> 1.0), List()), List(token, 485, 488, very, Map(confidence -> 1.0), List()), List(token, 490, 493, good, Map(confidence -> 1.0), List()), List(token, 495, 499, hands, Map(confidence -> 1.0), List()), List(token, 500, 500, ., Map(confidence -> 0.0), List()), List(token, 503, 505, the, Map(confidence -> 1.0), List()), List(token, 507, 515, apartment, Map(confidence -> 1.0), List()), List(token, 517, 522, itself, Map(confidence -> 1.0), List()), List(token, 524, 526, was, Map(confidence -> 1.0), List()), List(token, 528, 534, perfect, Map(confidence -> 1.0), List()), List(token, 535, 535, ,, Map(confidence -> 0.0), List()), List(token, 537, 540, with, Map(confidence -> 1.0), List()), List(token, 542, 547, plenty, Map(confidence -> 1.0), List()), List(token, 549, 550, of, Map(confidence -> 1.0), List()), List(token, 552, 558, windows, Map(confidence -> 1.0), List()), List(token, 559, 559, ,, Map(confidence -> 0.0), List()), List(token, 561, 564, free, Map(confidence -> 1.0), List()), List(token, 566, 571, coffee, Map(confidence -> 1.0), List()), List(token, 572, 572, ,, Map(confidence -> 0.0), List()), List(token, 574, 576, etc, Map(confidence -> 1.0), List()), List(token, 577, 577, ., Map(confidence -> 0.0), List()), List(token, 580, 580, i, Map(confidence -> 1.0), List()), List(token, 582, 586, would, Map(confidence -> 1.0), List()), List(token, 588, 597, definitely, Map(confidence -> 1.0), List()), List(token, 599, 602, stay, Map(confidence -> 1.0), List()), List(token, 604, 608, again, Map(confidence -> 1.0), List()), List(token, 609, 609, ., Map(confidence -> 0.0), List()), List(token, 611, 616, Thanks, Map(confidence -> 1.0), List()), List(token, 618, 623, Sylvia, Map(confidence -> 1.0), List()), List(token, 624, 624, !, Map(confidence -> 0.0), List()), List(token, 627, 631, sorry, Map(confidence -> 1.0), List()), List(token, 633, 635, for, Map(confidence -> 1.0), List()), List(token, 637, 639, the, Map(confidence -> 1.0), List()), List(token, 641, 644, late, Map(confidence -> 1.0), List()), List(token, 646, 651, review, Map(confidence -> 1.0), List()), List(token, 652, 652, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 70, positive, Map(confidence -> 0.5307), List()), List(sentiment, 73, 89, positive, Map(confidence -> 0.6489), List()), List(sentiment, 92, 164, negative, Map(confidence -> 0.5141), List()), List(sentiment, 167, 180, negative, Map(confidence -> 0.5147), List()), List(sentiment, 182, 363, positive, Map(confidence -> 0.4912), List()), List(sentiment, 366, 500, negative, Map(confidence -> 0.4834), List()), List(sentiment, 503, 577, positive, Map(confidence -> 0.5020), List()), List(sentiment, 580, 609, positive, Map(confidence -> 0.5408), List()), List(sentiment, 611, 624, positive, Map(confidence -> 0.7150), List()), List(sentiment, 627, 652, negative, Map(confidence -> 0.4677), List()))"
5456,27248,2010-02-22,18328,Jeremy,Another fantastic stay! Sylvia is a great host.,"List(List(document, 0, 46, Another fantastic stay! Sylvia is a great host., Map(sentence -> 0), List()))","List(List(document, 0, 22, Another fantastic stay!, Map(sentence -> 0), List()), List(document, 24, 46, Sylvia is a great host., Map(sentence -> 1), List()))","List(List(token, 0, 6, Another, Map(sentence -> 0), List()), List(token, 8, 16, fantastic, Map(sentence -> 0), List()), List(token, 18, 21, stay, Map(sentence -> 0), List()), List(token, 22, 22, !, Map(sentence -> 0), List()), List(token, 24, 29, Sylvia, Map(sentence -> 1), List()), List(token, 31, 32, is, Map(sentence -> 1), List()), List(token, 34, 34, a, Map(sentence -> 1), List()), List(token, 36, 40, great, Map(sentence -> 1), List()), List(token, 42, 45, host, Map(sentence -> 1), List()), List(token, 46, 46, ., Map(sentence -> 1), List()))","List(List(token, 0, 6, Another, Map(confidence -> 1.0), List()), List(token, 8, 16, fantastic, Map(confidence -> 1.0), List()), List(token, 18, 21, stay, Map(confidence -> 1.0), List()), List(token, 22, 22, !, Map(confidence -> 0.0), List()), List(token, 24, 29, Sylvia, Map(confidence -> 1.0), List()), List(token, 31, 32, is, Map(confidence -> 1.0), List()), List(token, 34, 34, a, Map(confidence -> 1.0), List()), List(token, 36, 40, great, Map(confidence -> 1.0), List()), List(token, 42, 45, host, Map(confidence -> 1.0), List()), List(token, 46, 46, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 22, positive, Map(confidence -> 0.7145), List()), List(sentiment, 24, 46, positive, Map(confidence -> 0.5256), List()))"
5456,27738,2010-02-27,21519,Jessica,"The studio is charming and cheerful and Sylvia and her four dogs (who stayed mostly in the main house but were there to greet us when we arrived :))were very welcoming. The location is convenient and Sylvia provided every ammenity we could need including not just a space heater in the main room but, thoughtfully, one in the bathroom too. Thanks, Sylvia!","List(List(document, 0, 354, The studio is charming and cheerful and Sylvia and her four dogs (who stayed mostly in the main house but were there to greet us when we arrived :))were very welcoming. The location is convenient and Sylvia provided every ammenity we could need including not just a space heater in the main room but, thoughtfully, one in the bathroom too. Thanks, Sylvia!, Map(sentence -> 0), List()))","List(List(document, 0, 167, The studio is charming and cheerful and Sylvia and her four dogs (who stayed mostly in the main house but were there to greet us when we arrived :))were very welcoming., Map(sentence -> 0), List()), List(document, 169, 338, The location is convenient and Sylvia provided every ammenity we could need including not just a space heater in the main room but, thoughtfully, one in the bathroom too., Map(sentence -> 1), List()), List(document, 340, 354, Thanks, Sylvia!, Map(sentence -> 2), List()))","List(List(token, 0, 2, The, Map(sentence -> 0), List()), List(token, 4, 9, studio, Map(sentence -> 0), List()), List(token, 11, 12, is, Map(sentence -> 0), List()), List(token, 14, 21, charming, Map(sentence -> 0), List()), List(token, 23, 25, and, Map(sentence -> 0), List()), List(token, 27, 34, cheerful, Map(sentence -> 0), List()), List(token, 36, 38, and, Map(sentence -> 0), List()), List(token, 40, 45, Sylvia, Map(sentence -> 0), List()), List(token, 47, 49, and, Map(sentence -> 0), List()), List(token, 51, 53, her, Map(sentence -> 0), List()), List(token, 55, 58, four, Map(sentence -> 0), List()), List(token, 60, 63, dogs, Map(sentence -> 0), List()), List(token, 65, 65, (, Map(sentence -> 0), List()), List(token, 66, 68, who, Map(sentence -> 0), List()), List(token, 70, 75, stayed, Map(sentence -> 0), List()), List(token, 77, 82, mostly, Map(sentence -> 0), List()), List(token, 84, 85, in, Map(sentence -> 0), List()), List(token, 87, 89, the, Map(sentence -> 0), List()), List(token, 91, 94, main, Map(sentence -> 0), List()), List(token, 96, 100, house, Map(sentence -> 0), List()), List(token, 102, 104, but, Map(sentence -> 0), List()), List(token, 106, 109, were, Map(sentence -> 0), List()), List(token, 111, 115, there, Map(sentence -> 0), List()), List(token, 117, 118, to, Map(sentence -> 0), List()), List(token, 120, 124, greet, Map(sentence -> 0), List()), List(token, 126, 127, us, Map(sentence -> 0), List()), List(token, 129, 132, when, Map(sentence -> 0), List()), List(token, 134, 135, we, Map(sentence -> 0), List()), List(token, 137, 143, arrived, Map(sentence -> 0), List()), List(token, 145, 147, :)), Map(sentence -> 0), List()), List(token, 148, 151, were, Map(sentence -> 0), List()), List(token, 153, 156, very, Map(sentence -> 0), List()), List(token, 158, 166, welcoming, Map(sentence -> 0), List()), List(token, 167, 167, ., Map(sentence -> 0), List()), List(token, 169, 171, The, Map(sentence -> 1), List()), List(token, 173, 180, location, Map(sentence -> 1), List()), List(token, 182, 183, is, Map(sentence -> 1), List()), List(token, 185, 194, convenient, Map(sentence -> 1), List()), List(token, 196, 198, and, Map(sentence -> 1), List()), List(token, 200, 205, Sylvia, Map(sentence -> 1), List()), List(token, 207, 214, provided, Map(sentence -> 1), List()), List(token, 216, 220, every, Map(sentence -> 1), List()), List(token, 222, 229, ammenity, Map(sentence -> 1), List()), List(token, 231, 232, we, Map(sentence -> 1), List()), List(token, 234, 238, could, Map(sentence -> 1), List()), List(token, 240, 243, need, Map(sentence -> 1), List()), List(token, 245, 253, including, Map(sentence -> 1), List()), List(token, 255, 257, not, Map(sentence -> 1), List()), List(token, 259, 262, just, Map(sentence -> 1), List()), List(token, 264, 264, a, Map(sentence -> 1), List()), List(token, 266, 270, space, Map(sentence -> 1), List()), List(token, 272, 277, heater, Map(sentence -> 1), List()), List(token, 279, 280, in, Map(sentence -> 1), List()), List(token, 282, 284, the, Map(sentence -> 1), List()), List(token, 286, 289, main, Map(sentence -> 1), List()), List(token, 291, 294, room, Map(sentence -> 1), List()), List(token, 296, 298, but, Map(sentence -> 1), List()), List(token, 299, 299, ,, Map(sentence -> 1), List()), List(token, 301, 312, thoughtfully, Map(sentence -> 1), List()), List(token, 313, 313, ,, Map(sentence -> 1), List()), List(token, 315, 317, one, Map(sentence -> 1), List()), List(token, 319, 320, in, Map(sentence -> 1), List()), List(token, 322, 324, the, Map(sentence -> 1), List()), List(token, 326, 333, bathroom, Map(sentence -> 1), List()), List(token, 335, 337, too, Map(sentence -> 1), List()), List(token, 338, 338, ., Map(sentence -> 1), List()), List(token, 340, 345, Thanks, Map(sentence -> 2), List()), List(token, 346, 346, ,, Map(sentence -> 2), List()), List(token, 348, 353, Sylvia, Map(sentence -> 2), List()), List(token, 354, 354, !, Map(sentence -> 2), List()))","List(List(token, 0, 2, The, Map(confidence -> 1.0), List()), List(token, 4, 9, studio, Map(confidence -> 1.0), List()), List(token, 11, 12, is, Map(confidence -> 1.0), List()), List(token, 14, 21, charming, Map(confidence -> 1.0), List()), List(token, 23, 25, and, Map(confidence -> 1.0), List()), List(token, 27, 34, cheerful, Map(confidence -> 1.0), List()), List(token, 36, 38, and, Map(confidence -> 1.0), List()), List(token, 40, 45, Sylvia, Map(confidence -> 1.0), List()), List(token, 47, 49, and, Map(confidence -> 1.0), List()), List(token, 51, 53, her, Map(confidence -> 1.0), List()), List(token, 55, 58, four, Map(confidence -> 1.0), List()), List(token, 60, 63, dogs, Map(confidence -> 1.0), List()), List(token, 65, 65, (, Map(confidence -> 0.0), List()), List(token, 66, 68, who, Map(confidence -> 1.0), List()), List(token, 70, 75, stayed, Map(confidence -> 1.0), List()), List(token, 77, 82, mostly, Map(confidence -> 1.0), List()), List(token, 84, 85, in, Map(confidence -> 1.0), List()), List(token, 87, 89, the, Map(confidence -> 1.0), List()), List(token, 91, 94, main, Map(confidence -> 1.0), List()), List(token, 96, 100, house, Map(confidence -> 1.0), List()), List(token, 102, 104, but, Map(confidence -> 1.0), List()), List(token, 106, 109, were, Map(confidence -> 1.0), List()), List(token, 111, 115, there, Map(confidence -> 1.0), List()), List(token, 117, 118, to, Map(confidence -> 1.0), List()), List(token, 120, 124, greet, Map(confidence -> 1.0), List()), List(token, 126, 127, us, Map(confidence -> 1.0), List()), List(token, 129, 132, when, Map(confidence -> 1.0), List()), List(token, 134, 135, we, Map(confidence -> 1.0), List()), List(token, 137, 143, arrived, Map(confidence -> 1.0), List()), List(token, 145, 147, :)), Map(confidence -> 0.0), List()), List(token, 148, 151, were, Map(confidence -> 1.0), List()), List(token, 153, 156, very, Map(confidence -> 1.0), List()), List(token, 158, 166, welcoming, Map(confidence -> 1.0), List()), List(token, 167, 167, ., Map(confidence -> 0.0), List()), List(token, 169, 171, The, Map(confidence -> 1.0), List()), List(token, 173, 180, location, Map(confidence -> 1.0), List()), List(token, 182, 183, is, Map(confidence -> 1.0), List()), List(token, 185, 194, convenient, Map(confidence -> 1.0), List()), List(token, 196, 198, and, Map(confidence -> 1.0), List()), List(token, 200, 205, Sylvia, Map(confidence -> 1.0), List()), List(token, 207, 214, provided, Map(confidence -> 1.0), List()), List(token, 216, 220, every, Map(confidence -> 1.0), List()), List(token, 222, 229, amenity, Map(confidence -> 0.0), List()), List(token, 231, 232, we, Map(confidence -> 1.0), List()), List(token, 234, 238, could, Map(confidence -> 1.0), List()), List(token, 240, 243, need, Map(confidence -> 1.0), List()), List(token, 245, 253, including, Map(confidence -> 1.0), List()), List(token, 255, 257, not, Map(confidence -> 1.0), List()), List(token, 259, 262, just, Map(confidence -> 1.0), List()), List(token, 264, 264, a, Map(confidence -> 1.0), List()), List(token, 266, 270, space, Map(confidence -> 1.0), List()), List(token, 272, 277, heater, Map(confidence -> 1.0), List()), List(token, 279, 280, in, Map(confidence -> 1.0), List()), List(token, 282, 284, the, Map(confidence -> 1.0), List()), List(token, 286, 289, main, Map(confidence -> 1.0), List()), List(token, 291, 294, room, Map(confidence -> 1.0), List()), List(token, 296, 298, but, Map(confidence -> 1.0), List()), List(token, 299, 299, ,, Map(confidence -> 0.0), List()), List(token, 301, 312, thoughtfully, Map(confidence -> 1.0), List()), List(token, 313, 313, ,, Map(confidence -> 0.0), List()), List(token, 315, 317, one, Map(confidence -> 1.0), List()), List(token, 319, 320, in, Map(confidence -> 1.0), List()), List(token, 322, 324, the, Map(confidence -> 1.0), List()), List(token, 326, 333, bathroom, Map(confidence -> 1.0), List()), List(token, 335, 337, too, Map(confidence -> 1.0), List()), List(token, 338, 338, ., Map(confidence -> 0.0), List()), List(token, 340, 345, Thanks, Map(confidence -> 1.0), List()), List(token, 346, 346, ,, Map(confidence -> 0.0), List()), List(token, 348, 353, Sylvia, Map(confidence -> 1.0), List()), List(token, 354, 354, !, Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 167, positive, Map(confidence -> 0.5505), List()), List(sentiment, 169, 338, negative, Map(confidence -> 0.4845), List()), List(sentiment, 340, 354, positive, Map(confidence -> 0.4579), List()))"
5456,28538,2010-03-07,24075,Michael,Cozy and clean place. It was close to I-35 so that was convenient.,"List(List(document, 0, 66, Cozy and clean place. It was close to I-35 so that was convenient., Map(sentence -> 0), List()))","List(List(document, 0, 20, Cozy and clean place., Map(sentence -> 0), List()), List(document, 23, 66, It was close to I-35 so that was convenient., Map(sentence -> 1), List()))","List(List(token, 0, 3, Cozy, Map(sentence -> 0), List()), List(token, 5, 7, and, Map(sentence -> 0), List()), List(token, 9, 13, clean, Map(sentence -> 0), List()), List(token, 15, 19, place, Map(sentence -> 0), List()), List(token, 20, 20, ., Map(sentence -> 0), List()), List(token, 23, 24, It, Map(sentence -> 1), List()), List(token, 26, 28, was, Map(sentence -> 1), List()), List(token, 30, 34, close, Map(sentence -> 1), List()), List(token, 36, 37, to, Map(sentence -> 1), List()), List(token, 39, 42, I-35, Map(sentence -> 1), List()), List(token, 44, 45, so, Map(sentence -> 1), List()), List(token, 47, 50, that, Map(sentence -> 1), List()), List(token, 52, 54, was, Map(sentence -> 1), List()), List(token, 56, 65, convenient, Map(sentence -> 1), List()), List(token, 66, 66, ., Map(sentence -> 1), List()))","List(List(token, 0, 3, cozy, Map(confidence -> 0.5), List()), List(token, 5, 7, and, Map(confidence -> 1.0), List()), List(token, 9, 13, clean, Map(confidence -> 1.0), List()), List(token, 15, 19, place, Map(confidence -> 1.0), List()), List(token, 20, 20, ., Map(confidence -> 0.0), List()), List(token, 23, 24, It, Map(confidence -> 1.0), List()), List(token, 26, 28, was, Map(confidence -> 1.0), List()), List(token, 30, 34, close, Map(confidence -> 1.0), List()), List(token, 36, 37, to, Map(confidence -> 1.0), List()), List(token, 39, 42, I-35, Map(confidence -> 0.0), List()), List(token, 44, 45, so, Map(confidence -> 1.0), List()), List(token, 47, 50, that, Map(confidence -> 1.0), List()), List(token, 52, 54, was, Map(confidence -> 1.0), List()), List(token, 56, 65, convenient, Map(confidence -> 1.0), List()), List(token, 66, 66, ., Map(confidence -> 0.0), List()))","List(List(sentiment, 0, 20, positive, Map(confidence -> 0.4789), List()), List(sentiment, 23, 66, positive, Map(confidence -> 0.5287), List()))"


In [17]:
annotations_df.limit(1).show()

In [18]:
# sentiment_df = annotations_df.select("listing_id", "id", "date", "reviewer_id", "reviewer_name", "text", "sentiment",
#                                      averageSentiment("sentiment").alias("avg_sentiment"),
#                                      highestConfidence("sentiment").alias("high_conf_sentiment"),
#                                      longestSentiment("sentiment").alias("long_conf_sentiment"),
#                                      classifySentiment("sentiment").alias("classified_sentiment")
#                                     )
# display(sentiment_df)

annotations_df.limit(1).select("listing_id", "id", "date", "reviewer_id", "reviewer_name", "text", "sentiment",
                                     longestSentiment("sentiment").alias("longest_sentiment")
                                    ).show()

In [19]:
avg_sentiment_df = sentiment_df.groupBy("listing_id").pivot("avg_sentiment", ["positive", "negative"]).count().orderBy("listing_id")
avg_sentiment_df = avg_sentiment_df.withColumnRenamed("positive", "avg_sentiment_positive_count") \
                                   .withColumnRenamed("negative", "avg_sentiment_negative_count") \
                                   .withColumn("avg_sentiment_positive_percent", F.expr("CASE WHEN avg_sentiment_negative_count is null and avg_sentiment_positive_count > 0 THEN 1 WHEN avg_sentiment_positive_count is null and avg_sentiment_negative_count > 0 THEN 0 ELSE avg_sentiment_positive_count/(avg_sentiment_positive_count + avg_sentiment_negative_count) END")) \
                                   .withColumn("avg_sentiment_positivity", F.expr("CASE WHEN avg_sentiment_positive_percent > 0.7 THEN 1 ELSE 0 END"))

In [20]:
high_conf_sentiment_df = sentiment_df.groupBy("listing_id").pivot("high_conf_sentiment", ["positive", "negative"]).count().orderBy("listing_id")
high_conf_sentiment_df = high_conf_sentiment_df.withColumnRenamed("positive", "high_conf_sentiment_positive_count") \
                                               .withColumnRenamed("negative", "high_conf_sentiment_negative_count") \
                                               .withColumn("high_conf_sentiment_positive_percent", F.expr("CASE WHEN high_conf_sentiment_negative_count is null and high_conf_sentiment_positive_count > 0 THEN 1 WHEN high_conf_sentiment_positive_count is null and high_conf_sentiment_negative_count > 0 THEN 0 ELSE high_conf_sentiment_positive_count/(high_conf_sentiment_positive_count + high_conf_sentiment_negative_count) END")) \
                                               .withColumn("high_conf_sentiment_positivity", F.expr("CASE WHEN high_conf_sentiment_positive_percent > 0.7 THEN 1 ELSE 0 END"))

In [21]:
long_conf_sentiment_df = sentiment_df.groupBy("listing_id").pivot("long_conf_sentiment", ["positive", "negative"]).count().orderBy("listing_id")
long_conf_sentiment_df = long_conf_sentiment_df.withColumnRenamed("positive", "long_conf_sentiment_positive_count") \
                                               .withColumnRenamed("negative", "long_conf_sentiment_negative_count") \
                                               .withColumn("long_conf_sentiment_positive_percent", F.expr("CASE WHEN long_conf_sentiment_negative_count is null and long_conf_sentiment_positive_count > 0 THEN 1 WHEN long_conf_sentiment_positive_count is null and long_conf_sentiment_negative_count > 0 THEN 0 ELSE long_conf_sentiment_positive_count/(long_conf_sentiment_positive_count + long_conf_sentiment_negative_count) END")) \
                                               .withColumn("long_conf_sentiment_positivity", F.expr("CASE WHEN long_conf_sentiment_positive_percent > 0.7 THEN 1 ELSE 0 END"))

In [22]:
class_sentiment_df = sentiment_df.groupBy("listing_id").pivot("classified_sentiment", ["positive", "negative"]).count().orderBy("listing_id")
class_sentiment_df = class_sentiment_df.withColumnRenamed("positive", "classified_sentiment_positive_count") \
                                       .withColumnRenamed("negative", "classified_sentiment_negative_count") \
                                       .withColumn("classified_sentiment_positive_percent", F.expr("CASE WHEN classified_sentiment_negative_count is null and classified_sentiment_positive_count> 0 THEN 1 WHEN classified_sentiment_positive_count is null and classified_sentiment_negative_count > 0 THEN 0 ELSE classified_sentiment_positive_count/(classified_sentiment_positive_count + classified_sentiment_negative_count) END")) \
                                       .withColumn("classified_sentiment_positivity", F.expr("CASE WHEN classified_sentiment_positive_percent > 0.7 THEN 1 ELSE 0 END"))

In [23]:
all_sentiment_df = avg_sentiment_df.join(high_conf_sentiment_df, ["listing_id"], how="inner") \
                                   .join(long_conf_sentiment_df, ["listing_id"], how="inner") \
                                   .join(class_sentiment_df, ["listing_id"], how="inner")

In [24]:
save_mode = "overwrite"

all_sentiment_df.write \
    .format("com.databricks.spark.csv") \
    .options(header="true") \
    .mode(save_mode) \
    .save("/FileStore/tables/all_sentiment.csv")

In [25]:
display(dbutils.fs.ls("/FileStore/tables"))

path,name,size
dbfs:/FileStore/tables/airlines.csv,airlines.csv,505
dbfs:/FileStore/tables/all_sentiment.csv/,all_sentiment.csv/,0
dbfs:/FileStore/tables/amazon_reviews_us_Digital_Video_Games_v1_00_tsv-5da15.gz,amazon_reviews_us_Digital_Video_Games_v1_00_tsv-5da15.gz,27442648
dbfs:/FileStore/tables/amazon_reviews_us_Software_v1_00_tsv-16dc0.gz,amazon_reviews_us_Software_v1_00_tsv-16dc0.gz,94010685
dbfs:/FileStore/tables/amazon_reviews_us_Video_Games_v1_00_tsv-56dc1.gz,amazon_reviews_us_Video_Games_v1_00_tsv-56dc1.gz,475199894
dbfs:/FileStore/tables/austin_weather_2017.csv,austin_weather_2017.csv,609844
dbfs:/FileStore/tables/calendar.csv,calendar.csv,180039751
dbfs:/FileStore/tables/config.py,config.py,174
dbfs:/FileStore/tables/customer_data.csv/,customer_data.csv/,0
dbfs:/FileStore/tables/data.csv,data.csv,73


In [26]:
listings_df = spark.read.jdbc(url=jdbcUrl, table="listings_full", properties=connectionProperties)
listings_df = listings_df.select("id", "latitude", "longitude", "review_scores_rating")

In [27]:
final_df = all_sentiment_df.join(listings_df, all_sentiment_df.listing_id == listings_df.id, how="inner").drop(F.col("id"))
display(final_df)

listing_id,avg_sentiment_positive_count,avg_sentiment_negative_count,avg_sentiment_positive_percent,avg_sentiment_positivity,high_conf_sentiment_positive_count,high_conf_sentiment_negative_count,high_conf_sentiment_positive_percent,high_conf_sentiment_positivity,long_conf_sentiment_positive_count,long_conf_sentiment_negative_count,long_conf_sentiment_positive_percent,long_conf_sentiment_positivity,classified_sentiment_positive_count,classified_sentiment_negative_count,classified_sentiment_positive_percent,classified_sentiment_positivity,latitude,longitude,review_scores_rating
21368263,17.0,14.0,0.5483870967741935,0,28.0,3.0,0.9032258064516128,1,22.0,9.0,0.7096774193548387,1,23.0,8.0,0.7419354838709677,1,30.2936,-97.6995,97
27310107,1.0,2.0,0.3333333333333333,0,2.0,1.0,0.6666666666666666,0,2.0,1.0,0.6666666666666666,0,2.0,1.0,0.6666666666666666,0,30.3841,-97.699,98
17611669,,3.0,0.0,0,1.0,2.0,0.3333333333333333,0,2.0,1.0,0.6666666666666666,0,2.0,1.0,0.6666666666666666,0,30.2864,-97.7489,90
21378172,1.0,,1.0,1,1.0,,1.0,1,1.0,,1.0,1,1.0,,1.0,1,30.2547,-97.7766,100
21364411,3.0,1.0,0.75,1,4.0,,1.0,1,3.0,1.0,0.75,1,4.0,,1.0,1,30.2204,-97.9106,100
22937302,,1.0,0.0,0,1.0,,1.0,1,1.0,,1.0,1,1.0,,1.0,1,30.2581,-97.7283,100
27102313,1.0,2.0,0.3333333333333333,0,2.0,1.0,0.6666666666666666,0,1.0,2.0,0.3333333333333333,0,1.0,2.0,0.3333333333333333,0,30.4362,-97.7307,98
21366577,12.0,11.0,0.5217391304347826,0,22.0,1.0,0.9565217391304348,1,20.0,3.0,0.8695652173913043,1,20.0,3.0,0.8695652173913043,1,30.1469,-97.8568,98
977492,32.0,20.0,0.6153846153846154,0,43.0,9.0,0.8269230769230769,1,36.0,16.0,0.6923076923076923,0,36.0,16.0,0.6923076923076923,0,30.2521,-97.7238,98
21380835,1.0,1.0,0.5,0,2.0,,1.0,1,1.0,1.0,0.5,0,1.0,1.0,0.5,0,30.2661,-97.7426,98
