In [None]:
REVIEWS_URL = 'https://ids-storage-football-prediction.s3-eu-west-1.amazonaws.com/data_mmwd/reviews.csv'
REVIEWS_AND_POLARITY_URL = 'https://ids-storage-football-prediction.s3-eu-west-1.amazonaws.com/data_mmwd/reviews_en_polarity.csv'
CALENDAR_URL = 'https://ids-storage-football-prediction.s3-eu-west-1.amazonaws.com/data_mmwd/calendar.csv'
LISTINGS_URL = 'https://ids-storage-football-prediction.s3-eu-west-1.amazonaws.com/data_mmwd/listings.csv'
MEAN_POLARITY_URL = 'https://ids-storage-football-prediction.s3-eu-west-1.amazonaws.com/data_mmwd/polarity_mean.csv'

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from scipy import stats
import seaborn as sns
from scipy.linalg import sqrtm
from sklearn.metrics import mean_squared_error
from math import sqrt

plt.style.use('fivethirtyeight')

In [None]:
listings = pd.read_csv(LISTINGS_URL)
polarity = pd.read_csv(MEAN_POLARITY_URL, index_col=['Unnamed: 0'])

In [None]:
listings.shape

(2613, 74)

In [None]:
polarity.columns

Index(['listing_id', 'polarity'], dtype='object')

In [None]:
polarity.columns = ['id', 'polarity']

In [None]:
polarity.head()

Unnamed: 0,id,polarity
0,36642,0.807
1,59258,0.985
2,158824,0.743
3,261065,0.8304
4,336692,0.9537


In [None]:
df = pd.merge(listings, polarity, on='id')

In [None]:
df.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,polarity
0,36642,https://www.airbnb.com/rooms/36642,20201021220716,2020-10-22,City Conveniences A Country Setting,<b>The space</b><br />Are you planning a vacat...,,https://a0.muscache.com/pictures/7542487/52136...,157787,https://www.airbnb.com/users/show/157787,Donna,2010-07-04,"Ottawa, Ontario, Canada",I am originally from Nova Scotia but I have li...,,,100%,f,https://a0.muscache.com/im/users/157787/profil...,https://a0.muscache.com/im/users/157787/profil...,,1,1,"['email', 'phone', 'facebook', 'reviews']",t,f,,Kitchissippi,,45.40159,-75.74679,Private room in house,Private room,2,,2 baths,1.0,1.0,"[""Essentials"", ""Hot water"", ""Dryer"", ""Extra pi...",$65.00,3,14,3,3,14,14,3.0,14.0,,t,29,59,89,364,2020-10-22,87,1,0,2010-07-11,2020-02-17,94.0,10.0,10.0,10.0,10.0,10.0,9.0,,f,1,0,1,0,0.69,0.807
1,59258,https://www.airbnb.com/rooms/59258,20201021220716,2020-10-22,Lower level of my house to rent,"Lower 1/2 of bungalow, a separate entrance, 10...",,https://a0.muscache.com/pictures/359337/5d90d6...,284351,https://www.airbnb.com/users/show/284351,Elena,2010-11-11,"Ottawa, Ontario, Canada",Canada rocks!,,,,f,https://a0.muscache.com/im/users/284351/profil...,https://a0.muscache.com/im/users/284351/profil...,,1,1,"['email', 'phone', 'reviews', 'offline_governm...",t,t,,Bay,,45.359,-75.80314,Entire apartment,Entire home/apt,4,,1 bath,1.0,1.0,"[""Wifi"", ""Washer"", ""Dryer"", ""Heating"", ""Free p...",$200.00,3,730,3,3,730,730,3.0,730.0,,t,0,0,0,0,2020-10-22,1,0,0,2011-07-04,2011-07-04,100.0,10.0,8.0,10.0,10.0,10.0,10.0,,f,1,1,0,0,0.01,0.985
2,158824,https://www.airbnb.com/rooms/158824,20201021220716,2020-10-23,DOWNTOWN/GLEBE HOUSE WITH BACKYARD,Big 3 bedroom house with backyard in desirable...,Very popular neighbourhood surrounded by milli...,https://a0.muscache.com/pictures/82249081/0a7b...,87296,https://www.airbnb.com/users/show/87296,Luc,2010-03-02,"Ottawa, Ontario, Canada",World Traveler always interested to meet new p...,within a day,100%,100%,f,https://a0.muscache.com/im/pictures/user/576b9...,https://a0.muscache.com/im/pictures/user/576b9...,,2,2,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,t,"Ottawa, Ontario, Canada",Capital,,45.40248,-75.69941,Entire house,Entire home/apt,3,,1 bath,2.0,2.0,"[""Wifi"", ""Essentials"", ""Smoke alarm"", ""Washer""...",$140.00,3,365,3,3,365,365,3.0,365.0,,t,0,0,0,0,2020-10-23,7,0,0,2011-09-22,2015-08-30,95.0,10.0,9.0,10.0,10.0,10.0,10.0,,f,1,1,0,0,0.06,0.743
3,261065,https://www.airbnb.com/rooms/261065,20201021220716,2020-10-23,Westboro Village Executive Suite,A tastefully decorated and well equipped upper...,"Westboro Village is a century-old community, a...",https://a0.muscache.com/pictures/2561680/6c684...,1369632,https://www.airbnb.com/users/show/1369632,Glenn,2011-11-04,"Ottawa, Ontario, Canada",My adventurous spirit was awakened when I went...,within a few hours,100%,71%,t,https://a0.muscache.com/im/users/1369632/profi...,https://a0.muscache.com/im/users/1369632/profi...,,1,1,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Ottawa, Ontario, Canada",Kitchissippi,,45.38972,-75.75496,Entire apartment,Entire home/apt,4,,1 bath,1.0,1.0,"[""Essentials"", ""Hot water"", ""Dryer"", ""Stove"", ...",$150.00,3,180,3,3,180,180,3.0,180.0,,t,19,49,79,354,2020-10-23,64,6,1,2011-12-24,2020-09-15,99.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,1,1,0,0,0.59,0.8304
4,336692,https://www.airbnb.com/rooms/336692,20201021220716,2020-10-23,Quiet Room with a view in the Ottawa Downtown,Renting only to females. Modern condo with pan...,Walking distance to:<br />-Parliament<br />-Do...,https://a0.muscache.com/pictures/12173287/8d76...,1712975,https://www.airbnb.com/users/show/1712975,Elena,2012-02-07,"Ottawa, Ontario, Canada",I am Elena. I work as a social worker. I am a ...,within a few hours,100%,0%,f,https://a0.muscache.com/im/pictures/user/599a4...,https://a0.muscache.com/im/pictures/user/599a4...,,2,2,"['email', 'phone', 'reviews', 'offline_governm...",t,t,"Ottawa, Ontario, Canada",Somerset,,45.41557,-75.70551,Private room in apartment,Private room,5,,1 shared bath,1.0,1.0,"[""Gym"", ""Luggage dropoff allowed"", ""Essentials...",$69.00,1,300,1,1,300,300,1.0,300.0,,t,17,47,77,352,2020-10-23,9,0,0,2017-07-02,2019-08-31,100.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,1,0,1,0,0.22,0.9537


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2183 entries, 0 to 2182
Data columns (total 75 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            2183 non-null   int64  
 1   listing_url                                   2183 non-null   object 
 2   scrape_id                                     2183 non-null   int64  
 3   last_scraped                                  2183 non-null   object 
 4   name                                          2183 non-null   object 
 5   description                                   2143 non-null   object 
 6   neighborhood_overview                         1614 non-null   object 
 7   picture_url                                   2183 non-null   object 
 8   host_id                                       2183 non-null   int64  
 9   host_url                                      2183 non-null   o

In [None]:
df.shape

(2183, 75)

In [None]:
df.reset_index(inplace=True)

In [None]:
df = df.drop(['picture_url', 'host_id', 'host_url',
       'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'calendar_updated', 'has_availability',
       'availability_30', 'availability_60', 'availability_90', 'availability_365', 'calendar_last_scraped', 'license', 'instant_bookable',
       'calculated_host_listings_count',
       'calculated_host_listings_count_entire_homes',
       'calculated_host_listings_count_private_rooms',
       'calculated_host_listings_count_shared_rooms', 'neighborhood_overview', 'bathrooms',
       'first_review',
       'last_review', 'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 'reviews_per_month'
       ], axis=1)

In [None]:
df.shape

(2183, 21)

In [None]:
df = df[df['bedrooms'].notna()]

In [None]:
df.shape

(2018, 21)

In [None]:
df = df[df['description'].notna()]

In [None]:
df = df[df['beds'].notna()]

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1981 entries, 0 to 2182
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   index                   1981 non-null   int64  
 1   id                      1981 non-null   int64  
 2   listing_url             1981 non-null   object 
 3   scrape_id               1981 non-null   int64  
 4   last_scraped            1981 non-null   object 
 5   name                    1981 non-null   object 
 6   description             1981 non-null   object 
 7   latitude                1981 non-null   float64
 8   longitude               1981 non-null   float64
 9   property_type           1981 non-null   object 
 10  room_type               1981 non-null   object 
 11  accommodates            1981 non-null   int64  
 12  bathrooms_text          1981 non-null   object 
 13  bedrooms                1981 non-null   float64
 14  beds                    1981 non-null   

In [None]:
df.head()

Unnamed: 0,index,id,listing_url,scrape_id,last_scraped,name,description,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,amenities,price,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,polarity
0,0,36642,https://www.airbnb.com/rooms/36642,20201021220716,2020-10-22,City Conveniences A Country Setting,<b>The space</b><br />Are you planning a vacat...,45.40159,-75.74679,Private room in house,Private room,2,2 baths,1.0,1.0,"[""Essentials"", ""Hot water"", ""Dryer"", ""Extra pi...",$65.00,87,1,0,0.807
1,1,59258,https://www.airbnb.com/rooms/59258,20201021220716,2020-10-22,Lower level of my house to rent,"Lower 1/2 of bungalow, a separate entrance, 10...",45.359,-75.80314,Entire apartment,Entire home/apt,4,1 bath,1.0,1.0,"[""Wifi"", ""Washer"", ""Dryer"", ""Heating"", ""Free p...",$200.00,1,0,0,0.985
2,2,158824,https://www.airbnb.com/rooms/158824,20201021220716,2020-10-23,DOWNTOWN/GLEBE HOUSE WITH BACKYARD,Big 3 bedroom house with backyard in desirable...,45.40248,-75.69941,Entire house,Entire home/apt,3,1 bath,2.0,2.0,"[""Wifi"", ""Essentials"", ""Smoke alarm"", ""Washer""...",$140.00,7,0,0,0.743
3,3,261065,https://www.airbnb.com/rooms/261065,20201021220716,2020-10-23,Westboro Village Executive Suite,A tastefully decorated and well equipped upper...,45.38972,-75.75496,Entire apartment,Entire home/apt,4,1 bath,1.0,1.0,"[""Essentials"", ""Hot water"", ""Dryer"", ""Stove"", ...",$150.00,64,6,1,0.8304
4,4,336692,https://www.airbnb.com/rooms/336692,20201021220716,2020-10-23,Quiet Room with a view in the Ottawa Downtown,Renting only to females. Modern condo with pan...,45.41557,-75.70551,Private room in apartment,Private room,5,1 shared bath,1.0,1.0,"[""Gym"", ""Luggage dropoff allowed"", ""Essentials...",$69.00,9,0,0,0.9537


In [None]:
df.to_csv('merged_listings_and_polarity.csv')

In [None]:
df.sort_values(by=['polarity', 'number_of_reviews'], ascending=False).head()

Unnamed: 0,index,id,listing_url,scrape_id,last_scraped,name,description,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,amenities,price,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,polarity
121,121,6475461,https://www.airbnb.com/rooms/6475461,20201021220716,2020-10-23,Crystal Bay Loft,Large loft room with ensuite bath in picturesq...,45.36295,-75.86228,Private room in house,Private room,2,1 private bath,1.0,1.0,"[""Luggage dropoff allowed"", ""Essentials"", ""Hot...",$65.00,108,13,3,0.9991
1790,1790,39297349,https://www.airbnb.com/rooms/39297349,20201021220716,2020-10-24,Ottawa Sleep Inn Back Packing Dorm Number 4,"Excellent location, conveniently located- walk...",45.41069,-75.69887,Shared room in house,Shared room,6,3 shared baths,1.0,6.0,"[""Heating"", ""Air conditioning"", ""Breakfast"", ""...",$39.00,14,12,1,0.9987
1391,1391,33135723,https://www.airbnb.com/rooms/33135723,20201021220716,2020-10-22,Entire Apartment Downtown Ottawa,"Amazing location, in a QUIET neighborhood. 3 m...",45.4175,-75.68969,Entire apartment,Entire home/apt,2,1 bath,1.0,1.0,"[""Luggage dropoff allowed"", ""Essentials"", ""Hot...",$86.00,2,2,2,0.9985
700,700,19783655,https://www.airbnb.com/rooms/19783655,20201021220716,2020-10-24,Beautiful Heritage Home - Sandy Hill,Beautiful heritage home right in the heart of ...,45.42849,-75.6804,Entire apartment,Entire home/apt,6,1 bath,3.0,3.0,"[""Dryer"", ""Heating"", ""Keypad"", ""Free street pa...",$160.00,18,0,0,0.9983
758,758,21184041,https://www.airbnb.com/rooms/21184041,20201021220716,2020-10-22,"Lovely, quiet home in central location","Welcome to Ottawa! This beautiful, modern semi...",45.37606,-75.73033,Entire townhouse,Entire home/apt,7,2 baths,3.0,3.0,"[""Essentials"", ""Hot water"", ""Dryer"", ""First ai...",$85.00,22,0,0,0.9982
