# Apply VADER Model

The purpose of this notebook is to apply the VADER model to the cleaned AirBNB and tripadvisor data, and combine all of the data into one master file.

In [1]:
import pandas as pd
import numpy as np
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

#### Change Setting Variables

Change the variable below in order change how the notebook functions.

In [2]:
zipcode_column_name = "zip_codes" #The name of the zipcode column within the TripAdvisor file
COVID_column_name = "COVID" #The name of the COVID column within the TripAdvisor file

testing = False #Set to True if testing the script. Set to False if running the script fully.
airbnb_directory = "data/AirBNB" #The directory where all of the cleaned AirBNB files are
tripadvisor_file = "data/trip_advisor_revised.csv.gz" #The path to the cleaned TripAdvisor data

#### Changing the TripAdvisor Layout and Running Applying VADER Model

In [3]:
#Reading in data
if testing == True:
    tripadvisor_df = pd.read_csv(tripadvisor_file, compression = "gzip", nrows = 100000)
    tripadvisor_df[zipcode_column_name] = np.nan
    tripadvisor_df[COVID_column_name] = np.nan
else:
    tripadvisor_df = pd.read_csv(tripadvisor_file, compression = "gzip")

In [4]:
tripadvisor_df.shape

(1824732, 6)

In [5]:
tripadvisor_df.head()

Unnamed: 0,City,zip_codes,Date,Price,Review,COVID
0,asheville,28803,2021-02-28,$$$,Clean with Spectacular Service This hotel was ...,Post-
1,asheville,28803,2021-01-31,$$$,Awesome Hotel! My boyfriend and I had a weeken...,Post-
2,asheville,28803,2021-02-28,$$$,"Great hotel Decided on a trip to Asheville, it...",Post-
3,asheville,28803,2021-02-28,$$$,Great hotel We loved this hotel! Beautifully d...,Post-
4,asheville,28803,2021-02-28,$$$,"Essence of ""Southern Hospitality""! This is a w...",Post-


In [6]:
#Renaming columns to match AirBNB column names
tripadvisor_df = tripadvisor_df.rename(columns = {"City":"city", "Date":"date", "Price":"price", "Review":"comments",zipcode_column_name:"zipcode", COVID_column_name:"COVID"})

In [7]:
#Creating columns that exist in AirBNB but do not exist in TripAdvisor
new_columns = ["listing_id", "neighborhood", "property", "room", "rating"]
for column in new_columns:
    tripadvisor_df[column] = np.nan

In [8]:
#Reordering columns to match AirBNB column order
column_names = ["listing_id", "neighborhood", "price", "property", "room", "rating", "zipcode", "date", "comments", "COVID", "city"]
tripadvisor_df = tripadvisor_df[column_names]

In [9]:
#Adding a source column so we can easily separate out TripAdvisor data
tripadvisor_df["source"] = "TripAdvisor"
tripadvisor_df.head()

Unnamed: 0,listing_id,neighborhood,price,property,room,rating,zipcode,date,comments,COVID,city,source
0,,,$$$,,,,28803,2021-02-28,Clean with Spectacular Service This hotel was ...,Post-,asheville,TripAdvisor
1,,,$$$,,,,28803,2021-01-31,Awesome Hotel! My boyfriend and I had a weeken...,Post-,asheville,TripAdvisor
2,,,$$$,,,,28803,2021-02-28,"Great hotel Decided on a trip to Asheville, it...",Post-,asheville,TripAdvisor
3,,,$$$,,,,28803,2021-02-28,Great hotel We loved this hotel! Beautifully d...,Post-,asheville,TripAdvisor
4,,,$$$,,,,28803,2021-02-28,"Essence of ""Southern Hospitality""! This is a w...",Post-,asheville,TripAdvisor


In [10]:
#Setting up VADER
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/tom/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [11]:
#Setting up function to apply VADER model
def create_polarity_scores(df):
    neg = []
    neu = []
    pos = []
    compound = []
    error = []
    for index, row in df.iterrows():
        try:
            pol_dict = sia.polarity_scores(row["comments"])
            neg.append(pol_dict["neg"])
            neu.append(pol_dict["neu"])
            pos.append(pol_dict["pos"])
            compound.append(pol_dict["compound"])
            error.append(0)
        except AttributeError:
            neg.append(np.nan)
            neu.append(np.nan)
            pos.append(np.nan)
            compound.append(np.nan)
            error.append(1)
    return neg, neu, pos, compound, error

In [12]:
#Applying VADER model
%time neg, neu, pos, compound, error = create_polarity_scores(tripadvisor_df)

CPU times: user 33min 50s, sys: 23.5 s, total: 34min 13s
Wall time: 34min 13s


In [13]:
tripadvisor_df["neg"] = neg
tripadvisor_df["neu"] = neu
tripadvisor_df["pos"] = pos
tripadvisor_df["compound"] = compound
tripadvisor_df["vader_error"] = error

In [14]:
#Setting up dictionary to report out VADER errors
vader_errors_dict = {"tripadvisor":tripadvisor_df["vader_error"].sum()}

In [15]:
tripadvisor_df.head()

Unnamed: 0,listing_id,neighborhood,price,property,room,rating,zipcode,date,comments,COVID,city,source,neg,neu,pos,compound,vader_error
0,,,$$$,,,,28803,2021-02-28,Clean with Spectacular Service This hotel was ...,Post-,asheville,TripAdvisor,0.009,0.732,0.259,0.9964,0
1,,,$$$,,,,28803,2021-01-31,Awesome Hotel! My boyfriend and I had a weeken...,Post-,asheville,TripAdvisor,0.0,0.559,0.441,0.9782,0
2,,,$$$,,,,28803,2021-02-28,"Great hotel Decided on a trip to Asheville, it...",Post-,asheville,TripAdvisor,0.0,0.606,0.394,0.9843,0
3,,,$$$,,,,28803,2021-02-28,Great hotel We loved this hotel! Beautifully d...,Post-,asheville,TripAdvisor,0.034,0.528,0.438,0.9769,0
4,,,$$$,,,,28803,2021-02-28,"Essence of ""Southern Hospitality""! This is a w...",Post-,asheville,TripAdvisor,0.0,0.728,0.272,0.9848,0


In [16]:
#Saving the initial csv
tripadvisor_df.to_csv("data/vaderoutput.csv.gz", compression = 'gzip', index = False)

In [17]:
#Releasing the tripadvisor df from memory
del tripadvisor_df

#### Applying the VADER model to the AirBNB files

In [18]:
#Setting up column name list to test that names are consistent
column_names = ["listing_id", "neighborhood", "price", "property", "room", "rating", "zipcode", "date", "comments", "COVID"]
error_dict = {}

In [19]:
for f in os.listdir(airbnb_directory):
    if f != ".DS_Store" and "gz" in f:
        print(f)
        #Reading in file
        if testing == True:
            df = pd.read_csv(os.path.join(airbnb_directory, f), compression = "gzip", nrows = 100000)
        else:
            df = pd.read_csv(os.path.join(airbnb_directory, f), compression = "gzip")
        
        try:
            #Correctly arranging column names if necessary
            df = df[column_names]
            #Processing files
            print("Number of Rows: %s" % len(df))
            city = f.split(".")[0]
            df["city"] = city
            df["source"] = "AirBNB"
            neg, neu, pos, compound, error = create_polarity_scores(df)
            df["neg"] = neg
            df["neu"] = neu
            df["pos"] = pos
            df["compound"] = compound
            df["vader_error"] = error
            vader_errors = df["vader_error"].sum()
            vader_errors_dict[f] = vader_errors
            print(df.head())
            print("\n")
            df.to_csv("data/vaderoutput.csv.gz", compression = 'gzip', index = False, mode = "a", header = False)
        except Exception as e:
            #If there is an error, adding it to the error dict for later processing
            error_dict[f] = e

Seattle.csv.gz


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 349087
   listing_id neighborhood    price      property             room  rating  \
0        2318      Madrona  $296.00  Entire house  Entire home/apt   100.0   
1        2318      Madrona  $296.00  Entire house  Entire home/apt   100.0   
2        2318      Madrona  $296.00  Entire house  Entire home/apt   100.0   
3        2318      Madrona  $296.00  Entire house  Entire home/apt   100.0   
4        2318      Madrona  $296.00  Entire house  Entire home/apt   100.0   

  zipcode        date                                           comments  \
0     NaN  2018-02-20  Lovely, sunny house.  Fully equipped kitchen. ...   
1     NaN  2018-02-25  This house made me want to move to Madrona :)....   
2     NaN  2018-03-28  Casa Madrona was perfect for our family of 5! ...   
3     NaN  2018-04-04  Megan's house is spacious, attractive, ultra-c...   
4     NaN  2018-04-22  While in Seattle for a couple weeks, I rented ...   

  COVID     city  source  neg    neu    pos  compou

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 90933
   listing_id          neighborhood   price               property  \
0       11879  Unincorporated Areas  $75.00  Private room in house   
1       11879  Unincorporated Areas  $75.00  Private room in house   
2       11879  Unincorporated Areas  $75.00  Private room in house   
3       11879  Unincorporated Areas  $75.00  Private room in house   
4       11879  Unincorporated Areas  $75.00  Private room in house   

           room  rating zipcode        date  \
0  Private room    97.0     NaN  2018-06-01   
1  Private room    97.0     NaN  2018-06-12   
2  Private room    97.0     NaN  2018-06-20   
3  Private room    97.0     NaN  2018-07-23   
4  Private room    97.0     NaN  2018-08-13   

                                            comments COVID       city  source  \
0  If you're staying in Aptos, Steve's house is i...  Pre-  SantaCruz  AirBNB   
1  Sunny room was perfect for my needs. Relaxed a...  Pre-  SantaCruz  AirBNB   
2  Steven's home is comfortable

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 329914
   listing_id neighborhood    price property             room  rating zipcode  \
0       10291      Navarre  $300.00  Cottage  Entire home/apt    97.0   70124   
1       10291      Navarre  $300.00  Cottage  Entire home/apt    97.0   70124   
2       10291      Navarre  $300.00  Cottage  Entire home/apt    97.0   70124   
3       10291      Navarre  $300.00  Cottage  Entire home/apt    97.0   70124   
4       10291      Navarre  $300.00  Cottage  Entire home/apt    97.0   70124   

         date                                           comments COVID  \
0  2018-02-12  Jill's place is in a great location and was ev...  Pre-   
1  2018-03-31                  Fantastic spot, would book again!  Pre-   
2  2018-04-09  This place is a MUST STAY! Host was very accom...  Pre-   
3  2018-05-08  Excellent place! We went for jazz fest and lov...  Pre-   
4  2018-05-27  Very clean, spacious, and comfortable place to...  Pre-   

         city  source  neg    neu    pos  com

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 505732
   listing_id neighborhood    price property             room  rating zipcode  \
0           6  North Hills  $295.00    House  Entire home/apt    96.0   92104   
1           6  North Hills  $295.00    House  Entire home/apt    96.0   92104   
2           6  North Hills  $295.00    House  Entire home/apt    96.0   92104   
3           6  North Hills  $295.00    House  Entire home/apt    96.0   92104   
4           6  North Hills  $295.00    House  Entire home/apt    96.0   92104   

         date                                           comments COVID  \
0  2018-01-02  Absolute delight. Cozy place close to all the ...  Pre-   
1  2018-01-05  Sarah was a great host, who had a beautiful ho...  Pre-   
2  2018-01-14  My wife and I stayed here for a weekend and we...  Pre-   
3  2018-01-25  Very good location ..... lots of cafes and eat...  Pre-   
4  2018-02-03  Awesome house within short driving distance to...  Pre-   

        city  source    neg    neu    pos  co

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 336709
   listing_id neighborhood   price                     property          room  \
0        2384    Hyde Park  $65.00  Private room in condominium  Private room   
1        2384    Hyde Park  $65.00  Private room in condominium  Private room   
2        2384    Hyde Park  $65.00  Private room in condominium  Private room   
3        2384    Hyde Park  $65.00  Private room in condominium  Private room   
4        2384    Hyde Park  $65.00  Private room in condominium  Private room   

   rating zipcode        date  \
0   100.0     NaN  2018-01-13   
1   100.0     NaN  2018-01-26   
2   100.0     NaN  2018-02-02   
3   100.0     NaN  2018-02-13   
4   100.0     NaN  2018-02-19   

                                            comments COVID     city  source  \
0  Rebecca is probably the best airbnb host I've ...  Pre-  Chicago  AirBNB   
1  Rebecca is a welcoming and gracious host. Her ...  Pre-  Chicago  AirBNB   
2  Staying with Rebecca was a pure pleasure. The ...  

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 269850
   listing_id neighborhood    price    property             room  rating  \
0         360     Highland  $140.00  Guesthouse  Entire home/apt   100.0   
1         360     Highland  $140.00  Guesthouse  Entire home/apt   100.0   
2         360     Highland  $140.00  Guesthouse  Entire home/apt   100.0   
3         360     Highland  $140.00  Guesthouse  Entire home/apt   100.0   
4         360     Highland  $140.00  Guesthouse  Entire home/apt   100.0   

  zipcode        date                                           comments  \
0   80211  2018-08-13  This space was perfect! Great location, hosts,...   
1   80211  2018-08-21  Hidden Gem in Denver. Magical Secret garden ce...   
2   80211  2018-08-23  Amazing stay at one of the best kept AirBnB se...   
3   80211  2018-08-24  The cottage has a great setting with a garden ...   
4   80211  2018-08-26  Super cute place, very peaceful (loved the gar...   

  COVID    city  source    neg    neu    pos  compound  vader_e

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 578637
   listing_id neighborhood   price           property             room  \
0        5065      Hamakua  $85.00  Bed and breakfast  Entire home/apt   
1        5065      Hamakua  $85.00  Bed and breakfast  Entire home/apt   
2        5065      Hamakua  $85.00  Bed and breakfast  Entire home/apt   
3        5065      Hamakua  $85.00  Bed and breakfast  Entire home/apt   
4        5065      Hamakua  $85.00  Bed and breakfast  Entire home/apt   

   rating zipcode        date  \
0    91.0   96727  2018-01-20   
1    91.0   96727  2018-01-25   
2    91.0   96727  2018-02-16   
3    91.0   96727  2018-02-19   
4    91.0   96727  2018-02-27   

                                            comments COVID    city  source  \
0  I highly recommend Wayne’s B&B. It is a good b...  Pre-  Hawaii  AirBNB   
1  Wayne and his wife were very friendly and thei...  Pre-  Hawaii  AirBNB   
2  The accommodation was very private, with adequ...  Pre-  Hawaii  AirBNB   
3  This was our secon

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 244755
   listing_id      neighborhood    price   property             room  rating  \
0         958  Western Addition  $170.00  Apartment  Entire home/apt    97.0   
1         958  Western Addition  $170.00  Apartment  Entire home/apt    97.0   
2         958  Western Addition  $170.00  Apartment  Entire home/apt    97.0   
3         958  Western Addition  $170.00  Apartment  Entire home/apt    97.0   
4         958  Western Addition  $170.00  Apartment  Entire home/apt    97.0   

  zipcode        date                                           comments  \
0   94117  2018-01-05  Holly's home is really beautiful.  Other revie...   
1   94117  2018-02-26  Perfect location and a lovely home! Holly and ...   
2   94117  2018-03-03  Holly was fantastic! Amazing location! Holly r...   
3   94117  2018-03-06  Awesome place. Great host. Excellent location....   
4   94117  2018-03-16  Great location with public transport and a par...   

  COVID          city  source    neg   

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 1280124
   listing_id neighborhood   price                   property          room  \
0        2708    Hollywood  $68.00  Private room in apartment  Private room   
1        2708    Hollywood  $68.00  Private room in apartment  Private room   
2        2708    Hollywood  $68.00  Private room in apartment  Private room   
3        2708    Hollywood  $68.00  Private room in apartment  Private room   
4        2708    Hollywood  $68.00  Private room in apartment  Private room   

   rating zipcode        date  \
0    97.0     NaN  2018-01-05   
1    97.0     NaN  2018-01-13   
2    97.0     NaN  2018-07-01   
3    97.0     NaN  2018-07-15   
4    97.0     NaN  2018-07-26   

                                            comments COVID city  source  neg  \
0  Chas house feels like home, it is clean, it ha...  Pre-   LA  AirBNB  0.0   
1  1. Clean 2. Host is very friendly 3. Reasonabl...  Pre-   LA  AirBNB  0.0   
2  Wow; Charles was a delight to meet a very warm...  Pre-   L

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 255526
   listing_id neighborhood    price      property             room  rating  \
0       57818    Hollywood  $125.00  Entire house  Entire home/apt    92.0   
1       57818    Hollywood  $125.00  Entire house  Entire home/apt    92.0   
2       57818    Hollywood  $125.00  Entire house  Entire home/apt    92.0   
3       57818    Hollywood  $125.00  Entire house  Entire home/apt    92.0   
4       57818    Hollywood  $125.00  Entire house  Entire home/apt    92.0   

  zipcode        date                                           comments  \
0     NaN  2018-02-27  Great spot in a nice area, we really enjoyed o...   
1     NaN  2018-03-09  Great hospitality and quick responses. The hos...   
2     NaN  2018-03-25  Excellent communication from owner. Great loca...   
3     NaN  2018-05-28  Great location for a family or if you are look...   
4     NaN  2018-07-29  Would probably stay here again if it was cheap...   

  COVID     city  source  neg    neu    pos  compou

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 200969
   listing_id neighborhood   price   property             room  rating  \
0       11464  Santa Clara  $85.00  Apartment  Entire home/apt    80.0   
1       11464  Santa Clara  $85.00  Apartment  Entire home/apt    80.0   
2       11464  Santa Clara  $85.00  Apartment  Entire home/apt    80.0   
3       11464  Santa Clara  $85.00  Apartment  Entire home/apt    80.0   
4       11464  Santa Clara  $85.00  Apartment  Entire home/apt    80.0   

  zipcode        date                                           comments  \
0   95051  2018-07-28  Nice place, great location, compact, but fille...   
1   95051  2018-08-26  Charming little apartment, friendly neighbors,...   
2   95051  2019-10-11  Emey is a gem! She was very helpful and unders...   
3   95051  2020-11-21  Emey's studio is larger than it looks in the p...   
4   95051  2021-01-17  Emey is a great communicator and she try’s to ...   

   COVID         city  source    neg    neu    pos  compound  vader_error  

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 344333
   listing_id  neighborhood    price property             room  rating  \
0        2265         78702  $200.00    House  Entire home/apt    92.0   
1        2265         78702  $200.00    House  Entire home/apt    92.0   
2        2265         78702  $200.00    House  Entire home/apt    92.0   
3        2265         78702  $200.00    House  Entire home/apt    92.0   
4        5245         78702  $125.00    House     Private room    91.0   

  zipcode        date                                           comments  \
0   78702  2018-04-22  I absolutely loved staying at Paddy’s place. I...   
1   78702  2018-05-28  Paddy is really nice. Willing to answer all yo...   
2   78702  2018-09-16  The house was perfect. Outdoor space and plent...   
3   78702  2019-03-16  We had a great stay at Paddy's place during SX...   
4   78702  2018-03-14  The house is very nice. The metro/train statio...   

  COVID    city  source  neg    neu    pos  compound  vader_error  
0  Pre-

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 159520
   listing_id neighborhood   price property             room  rating zipcode  \
0        2441     Hennepin  $85.00    House  Entire home/apt    95.0   55418   
1        2441     Hennepin  $85.00    House  Entire home/apt    95.0   55418   
2        2441     Hennepin  $85.00    House  Entire home/apt    95.0   55418   
3        2441     Hennepin  $85.00    House  Entire home/apt    95.0   55418   
4        2441     Hennepin  $85.00    House  Entire home/apt    95.0   55418   

         date                                           comments COVID  \
0  2018-02-05  Emily was incredible. By far the best AirBnB h...  Pre-   
1  2018-03-25  Emily was a gracious host and her cozy place w...  Pre-   
2  2018-04-22  Emily's place was a great fit for us while we ...  Pre-   
3  2018-05-11  Emily kindly let me stay at her apartment for ...  Pre-   
4  2018-06-07  This was my first stay in an Airbnb and what a...  Pre-   

         city  source    neg    neu    pos  compoun

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 396242
   listing_id neighborhood   price property          room  rating zipcode  \
0        6422   District 6  $40.00    House  Private room    99.0   37206   
1        6422   District 6  $40.00    House  Private room    99.0   37206   
2        6422   District 6  $40.00    House  Private room    99.0   37206   
3        6422   District 6  $40.00    House  Private room    99.0   37206   
4        6422   District 6  $40.00    House  Private room    99.0   37206   

         date                                           comments COVID  \
0  2018-01-03  Michele was a GREAT host! She responded fast; ...  Pre-   
1  2018-01-04  The room was very clean and incredibly well de...  Pre-   
2  2018-01-07  I had a fabulous stay at Michele's!  Her famil...  Pre-   
3  2018-01-09  We arrived & easily found the parking & Michel...  Pre-   
4  2018-01-10  Beautiful space with warm welcoming hosts! It ...  Pre-   

        city  source  neg    neu    pos  compound  vader_error  
0  N

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 94393
  listing_id neighborhood   price               property          room  \
0       3083     Prescott  $43.00  Private room in house  Private room   
1       3083     Prescott  $43.00  Private room in house  Private room   
2       3083     Prescott  $43.00  Private room in house  Private room   
3       3083     Prescott  $43.00  Private room in house  Private room   
4       3083     Prescott  $43.00  Private room in house  Private room   

   rating  zipcode        date  \
0    91.0      NaN  2018-03-25   
1    91.0      NaN  2018-05-26   
2    91.0      NaN  2018-06-02   
3    91.0      NaN  2018-06-18   
4    91.0      NaN  2018-07-01   

                                            comments COVID     city  source  \
0  Realy good opportunity for a cheap stay in San...  Pre-  Oakland  AirBNB   
1  Loved the vibe in the house. Great location. I...  Pre-  Oakland  AirBNB   
2                   great hospitality and very clean  Pre-  Oakland  AirBNB   
3  Great pla

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 68802
   listing_id                          neighborhood    price property  \
0        6860  Ward E (councilmember James Solomon)  $163.00     Loft   
1        6860  Ward E (councilmember James Solomon)  $163.00     Loft   
2        6860  Ward E (councilmember James Solomon)  $163.00     Loft   
3        6860  Ward E (councilmember James Solomon)  $163.00     Loft   
4        6860  Ward E (councilmember James Solomon)  $163.00     Loft   

              room  rating zipcode        date  \
0  Entire home/apt    92.0   07302  2018-01-02   
1  Entire home/apt    92.0   07302  2018-03-02   
2  Entire home/apt    92.0   07302  2018-03-27   
3  Entire home/apt    92.0   07302  2018-04-04   
4  Entire home/apt    92.0   07302  2018-05-04   

                                            comments COVID    city  source  \
0                                         Nice room!  Pre-  Jersey  AirBNB   
1          Second time staying here. Great as always  Pre-  Jersey  AirBNB   
2  C

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 306609
   listing_id        neighborhood   price               property  \
0        3686  Historic Anacostia  $55.00  Private room in house   
1        3686  Historic Anacostia  $55.00  Private room in house   
2        3686  Historic Anacostia  $55.00  Private room in house   
3        3686  Historic Anacostia  $55.00  Private room in house   
4        3686  Historic Anacostia  $55.00  Private room in house   

           room  rating zipcode        date  \
0  Private room    92.0     NaN  2018-10-27   
1  Private room    92.0     NaN  2019-03-03   
2  Private room    92.0     NaN  2019-05-18   
3  Private room    92.0     NaN  2019-08-05   
4  Private room    92.0     NaN  2020-04-01   

                                            comments  COVID          city  \
0  Vita was great and her place worked out perfec...   Pre-  WashingtonDC   
1  We love staying in Anacostia near the Fredrick...   Pre-  WashingtonDC   
2  I highly recommend to anyone planning a trip t...  

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 1223810
   listing_id neighborhood    price   property             room  rating  \
0        2595      Midtown  $225.00  Apartment  Entire home/apt    94.0   
1        2595      Midtown  $225.00  Apartment  Entire home/apt    94.0   
2        2595      Midtown  $225.00  Apartment  Entire home/apt    94.0   
3        2595      Midtown  $225.00  Apartment  Entire home/apt    94.0   
4        2595      Midtown  $225.00  Apartment  Entire home/apt    94.0   

  zipcode        date                                           comments  \
0   10018  2018-04-29  We really enjoyed our stay at Jennifer’s apart...   
1   10018  2018-05-18  This apartment is convenient and functional.  ...   
2   10018  2018-05-27  The Midtown Castle is a beautiful place to sta...   
3   10018  2018-06-06  Jennifer is very nice and responsive. The loca...   
4   10018  2018-09-27  Awesome location, spotless, wonderfully accomm...   

  COVID     city  source    neg    neu    pos  compound  vader_error

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 164251
   listing_id neighborhood    price   property             room  rating  \
0        3781  East Boston  $125.00  Apartment  Entire home/apt    99.0   
1        3781  East Boston  $125.00  Apartment  Entire home/apt    99.0   
2        3781  East Boston  $125.00  Apartment  Entire home/apt    99.0   
3        3781  East Boston  $125.00  Apartment  Entire home/apt    99.0   
4        3781  East Boston  $125.00  Apartment  Entire home/apt    99.0   

  zipcode        date                                           comments  \
0   02128  2018-07-02       Great place, amazing city, fantastic host!!!   
1   02128  2019-02-22  My partner and I stayed at Frank's apartment f...   
2   02128  2019-12-21  I liked very much this apartment rented by Fra...   
3   02128  2020-09-03  I stayed at Frank's apartment for three months...   
4   02128  2020-12-20  Wonderful 1 bedroom with a great location! The...   

   COVID    city  source  neg    neu    pos  compound  vader_error  


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of Rows: 116652
   listing_id  neighborhood    price property             room  rating  \
0       15373  Narragansett  $415.00    House  Entire home/apt     NaN   
1       15373  Narragansett  $415.00    House  Entire home/apt     NaN   
2       15373  Narragansett  $415.00    House  Entire home/apt     NaN   
3       21714     Pawtucket  $168.00     Loft  Entire home/apt    87.0   
4       21714     Pawtucket  $168.00     Loft  Entire home/apt    87.0   

  zipcode        date                                           comments  \
0    2882  2020-07-21         The place clean and the location is great.   
1    2882  2020-08-14  We had a great time at this cottage. It’s larg...   
2    2882  2020-08-28  Amazing spot. Quick walk to the beach. Lots of...   
3    2863  2018-11-23                 A great host and a beautiful place   
4    2863  2018-11-25  I usually don’t write reviews for anything, bu...   

   COVID         city  source    neg    neu    pos  compound  vader_error  

#### Error Checking

Below are the list of AirBNB files that had errors raised. These files will have to be processed independently, and appended to the output at a later time.

In [20]:
print("Errors in %s files" % len(error_dict))
print(error_dict)

Errors in 1 files
{'Dallas.csv.gz': KeyError("['zipcode'] not in index")}


Below are the list of files that had VADER errors greater than 0. If the number of VADER errors for a given file is large we may have to find the root cause.

In [21]:
file_count = 0
for f in sorted(vader_errors_dict.keys()):
    error_count = vader_errors_dict[f]
    if error_count > 0:
        print("%s - %s" % (f, error_count))
        file_count+= 1

if file_count == 0:
    print("No VADER errors")

ClarkCounty.csv.gz - 1
Oakland.csv.gz - 9


Showing output file format at top of file and near end of file

In [22]:
output_file = pd.read_csv("data/vaderoutput.csv.gz", compression = 'gzip', nrows = 5)

In [23]:
output_file.head()

Unnamed: 0,listing_id,neighborhood,price,property,room,rating,zipcode,date,comments,COVID,city,source,neg,neu,pos,compound,vader_error
0,,,$$$,,,,28803,2021-02-28,Clean with Spectacular Service This hotel was ...,Post-,asheville,TripAdvisor,0.009,0.732,0.259,0.9964,0
1,,,$$$,,,,28803,2021-01-31,Awesome Hotel! My boyfriend and I had a weeken...,Post-,asheville,TripAdvisor,0.0,0.559,0.441,0.9782,0
2,,,$$$,,,,28803,2021-02-28,"Great hotel Decided on a trip to Asheville, it...",Post-,asheville,TripAdvisor,0.0,0.606,0.394,0.9843,0
3,,,$$$,,,,28803,2021-02-28,Great hotel We loved this hotel! Beautifully d...,Post-,asheville,TripAdvisor,0.034,0.528,0.438,0.9769,0
4,,,$$$,,,,28803,2021-02-28,"Essence of ""Southern Hospitality""! This is a w...",Post-,asheville,TripAdvisor,0.0,0.728,0.272,0.9848,0


In [24]:
column_names = output_file.columns

In [25]:
if testing == True:
    skip = 200000
else:
    skip = 8460000

In [26]:
output_file = pd.read_csv("data/vaderoutput.csv.gz", compression = 'gzip', nrows = 5, skiprows = skip)

In [27]:
output_file.columns = column_names

In [28]:
output_file.head()

Unnamed: 0,listing_id,neighborhood,price,property,room,rating,zipcode,date,comments,COVID,city,source,neg,neu,pos,compound,vader_error
0,27923937,"Dupont Circle, Connecticut Avenue/K Street",$110.00,Apartment,Entire home/apt,95.0,20009,2019-08-25,This is a great location in DC. Dupont Circle ...,Pre-,WashingtonDC,AirBNB,0.0,0.732,0.268,0.9313,0
1,27923937,"Dupont Circle, Connecticut Avenue/K Street",$110.00,Apartment,Entire home/apt,95.0,20009,2019-08-26,The space is adorable and the location is amaz...,Pre-,WashingtonDC,AirBNB,0.0,0.49,0.51,0.807,0
2,27923937,"Dupont Circle, Connecticut Avenue/K Street",$110.00,Apartment,Entire home/apt,95.0,20009,2019-09-02,This unit is in a GREAT location. Can’t get an...,Pre-,WashingtonDC,AirBNB,0.0,0.869,0.131,0.7034,0
3,27923937,"Dupont Circle, Connecticut Avenue/K Street",$110.00,Apartment,Entire home/apt,95.0,20009,2019-09-05,"Stylish, comfortable, centrally located - grea...",Pre-,WashingtonDC,AirBNB,0.0,0.51,0.49,0.8268,0
4,27923937,"Dupont Circle, Connecticut Avenue/K Street",$110.00,Apartment,Entire home/apt,95.0,20009,2019-09-06,"Great location, very clean. \nFlexible check-i...",Pre-,WashingtonDC,AirBNB,0.0,0.193,0.807,0.9113,0
