In [8]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
# Import train_test_split function
from sklearn.model_selection import train_test_split
#Import Random Forest Model classifier
from sklearn.ensemble import RandomForestClassifier
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import seaborn as sns
pd.set_option('max_columns', None)
%matplotlib inline

In [2]:
# Open calendar and listings data
boston_calendar = pd.read_csv("boston_airbnb_data/calendar.csv")
boston_listings = pd.read_csv("boston_airbnb_data/listings.csv")

In [6]:
# Drop price column because we don't need it for our analysis
boston_calendar = boston_calendar.drop('price', axis=1)
# Get one column for each variable on for t(true) and one for f(false)
boston_calendar = pd.concat([boston_calendar.drop('available', axis=1), pd.get_dummies(boston_calendar['available'], prefix='available', prefix_sep='_')], axis=1)
# Group by each list id by adding the number of times each list_id is available and unavailable
boston_occupation = boston_calendar.groupby("listing_id").sum()
# Add a column with the occupancy percentage, which is the number of days occupied divided by the total number of days registered
boston_occupation["occupation_percentage"] = boston_occupation["available_f"]*100/(boston_occupation["available_f"]+boston_occupation["available_t"])

In [20]:
# Function Transform the output variable occupation_percentage that is continuous to categorical with five levels
def percentage_to_categorical(value):
    value = float(value)
    if value == 0:
        return 1
    elif value > 0 and value <= 100:
        return math.ceil(value/20)   
    else:
        return np.nan

In [21]:
# Apply function
boston_occupation["occupation_percentage"] = boston_occupation["occupation_percentage"].apply(percentage_to_categorical)
# Drop columns will no longer be used
boston_occupation = boston_occupation.drop(["available_f","available_t"], axis=1)

In [24]:
# Create a consolidate dataframe with all the listings data and the ocupation percentage
df = pd.merge(boston_listings, boston_occupation,left_on="id",right_on="listing_id", how="inner")

In [28]:
df.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month,occupation_percentage
0,12147973,https://www.airbnb.com/rooms/12147973,20160906204935,2016-09-07,Sunny Bungalow in the City,"Cozy, sunny, family home. Master bedroom high...",The house has an open and cozy feel at the sam...,"Cozy, sunny, family home. Master bedroom high...",none,"Roslindale is quiet, convenient and friendly. ...",,"The bus stop is 2 blocks away, and frequent. B...","You will have access to 2 bedrooms, a living r...",,Clean up and treat the home the way you'd like...,https://a2.muscache.com/im/pictures/c0842db1-e...,https://a2.muscache.com/im/pictures/c0842db1-e...,https://a2.muscache.com/im/pictures/c0842db1-e...,https://a2.muscache.com/im/pictures/c0842db1-e...,31303940,https://www.airbnb.com/users/show/31303940,Virginia,2015-04-15,"Boston, Massachusetts, United States",We are country and city connecting in our deck...,,,,f,https://a2.muscache.com/im/pictures/5936fef0-b...,https://a2.muscache.com/im/pictures/5936fef0-b...,Roslindale,1,1,"['email', 'phone', 'facebook', 'reviews']",t,f,"Birch Street, Boston, MA 02131, United States",Roslindale,Roslindale,,Boston,MA,2131.0,Boston,"Boston, MA",US,United States,42.282619,-71.133068,t,House,Entire home/apt,4,1.5,2.0,3.0,Real Bed,"{TV,""Wireless Internet"",Kitchen,""Free Parking ...",,$250.00,,,,$35.00,1,$0.00,2,1125,2 weeks ago,,0,0,0,0,2016-09-06,0,,,,,,,,,,f,,,f,moderate,f,f,1,,5
1,3075044,https://www.airbnb.com/rooms/3075044,20160906204935,2016-09-07,Charming room in pet friendly apt,Charming and quiet room in a second floor 1910...,Small but cozy and quite room with a full size...,Charming and quiet room in a second floor 1910...,none,"The room is in Roslindale, a diverse and prima...","If you don't have a US cell phone, you can tex...",Plenty of safe street parking. Bus stops a few...,Apt has one more bedroom (which I use) and lar...,"If I am at home, I am likely working in my hom...",Pet friendly but please confirm with me if the...,https://a1.muscache.com/im/pictures/39327812/d...,https://a1.muscache.com/im/pictures/39327812/d...,https://a1.muscache.com/im/pictures/39327812/d...,https://a1.muscache.com/im/pictures/39327812/d...,2572247,https://www.airbnb.com/users/show/2572247,Andrea,2012-06-07,"Boston, Massachusetts, United States",I live in Boston and I like to travel and have...,within an hour,100%,100%,f,https://a2.muscache.com/im/users/2572247/profi...,https://a2.muscache.com/im/users/2572247/profi...,Roslindale,1,1,"['email', 'phone', 'facebook', 'linkedin', 'am...",t,t,"Pinehurst Street, Boston, MA 02131, United States",Roslindale,Roslindale,,Boston,MA,2131.0,Boston,"Boston, MA",US,United States,42.286241,-71.134374,t,Apartment,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,""Wireless Internet"",""Air Conditio...",,$65.00,$400.00,,$95.00,$10.00,0,$0.00,2,15,a week ago,,26,54,84,359,2016-09-06,36,2014-06-01,2016-08-13,94.0,10.0,9.0,10.0,10.0,9.0,9.0,f,,,t,moderate,f,f,1,1.3,1
2,6976,https://www.airbnb.com/rooms/6976,20160906204935,2016-09-07,Mexican Folk Art Haven in Boston,"Come stay with a friendly, middle-aged guy in ...","Come stay with a friendly, middle-aged guy in ...","Come stay with a friendly, middle-aged guy in ...",none,The LOCATION: Roslindale is a safe and diverse...,I am in a scenic part of Boston with a couple ...,"PUBLIC TRANSPORTATION: From the house, quick p...","I am living in the apartment during your stay,...","ABOUT ME: I'm a laid-back, friendly, unmarried...","I encourage you to use my kitchen, cooking and...",https://a2.muscache.com/im/pictures/6ae8335d-9...,https://a2.muscache.com/im/pictures/6ae8335d-9...,https://a2.muscache.com/im/pictures/6ae8335d-9...,https://a2.muscache.com/im/pictures/6ae8335d-9...,16701,https://www.airbnb.com/users/show/16701,Phil,2009-05-11,"Boston, Massachusetts, United States","I am a middle-aged, single male with a wide ra...",within a few hours,100%,88%,t,https://a2.muscache.com/im/users/16701/profile...,https://a2.muscache.com/im/users/16701/profile...,Roslindale,1,1,"['email', 'phone', 'reviews', 'jumio']",t,t,"Ardale St., Boston, MA 02131, United States",Roslindale,Roslindale,,Boston,MA,2131.0,Boston,"Boston, MA",US,United States,42.292438,-71.135765,t,Apartment,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",""Wireless Internet"",""Air Condit...",,$65.00,$395.00,"$1,350.00",,,1,$20.00,3,45,5 days ago,,19,46,61,319,2016-09-06,41,2009-07-19,2016-08-05,98.0,10.0,9.0,10.0,10.0,9.0,10.0,f,,,f,moderate,t,f,1,0.47,1
3,1436513,https://www.airbnb.com/rooms/1436513,20160906204935,2016-09-07,Spacious Sunny Bedroom Suite in Historic Home,Come experience the comforts of home away from...,Most places you find in Boston are small howev...,Come experience the comforts of home away from...,none,Roslindale is a lovely little neighborhood loc...,Please be mindful of the property as it is old...,There are buses that stop right in front of th...,The basement has a washer dryer and gym area. ...,We do live in the house therefore might be som...,- The bathroom and house are shared so please ...,https://a2.muscache.com/im/pictures/39764190-1...,https://a2.muscache.com/im/pictures/39764190-1...,https://a2.muscache.com/im/pictures/39764190-1...,https://a2.muscache.com/im/pictures/39764190-1...,6031442,https://www.airbnb.com/users/show/6031442,Meghna,2013-04-21,"Boston, Massachusetts, United States",My husband and I live on the property. He’s a...,within a few hours,100%,50%,f,https://a2.muscache.com/im/pictures/5d430cde-7...,https://a2.muscache.com/im/pictures/5d430cde-7...,,1,1,"['email', 'phone', 'reviews']",t,f,"Boston, MA, United States",,Roslindale,,Boston,MA,,Boston,"Boston, MA",US,United States,42.281106,-71.121021,f,House,Private room,4,1.0,1.0,2.0,Real Bed,"{TV,Internet,""Wireless Internet"",""Air Conditio...",,$75.00,,,$100.00,$50.00,2,$25.00,1,1125,a week ago,,6,16,26,98,2016-09-06,1,2016-08-28,2016-08-28,100.0,10.0,10.0,10.0,10.0,10.0,10.0,f,,,f,moderate,f,f,1,1.0,4
4,7651065,https://www.airbnb.com/rooms/7651065,20160906204935,2016-09-07,Come Home to Boston,"My comfy, clean and relaxing home is one block...","Clean, attractive, private room, one block fro...","My comfy, clean and relaxing home is one block...",none,"I love the proximity to downtown, the neighbor...",I have one roommate who lives on the lower lev...,From Logan Airport and South Station you have...,You will have access to the front and side por...,I love my city and really enjoy sharing it wit...,"Please no smoking in the house, porch or on th...",https://a1.muscache.com/im/pictures/97154760/8...,https://a1.muscache.com/im/pictures/97154760/8...,https://a1.muscache.com/im/pictures/97154760/8...,https://a1.muscache.com/im/pictures/97154760/8...,15396970,https://www.airbnb.com/users/show/15396970,Linda,2014-05-11,"Boston, Massachusetts, United States",I work full time for a public school district....,within an hour,100%,100%,t,https://a0.muscache.com/im/users/15396970/prof...,https://a0.muscache.com/im/users/15396970/prof...,Roslindale,1,1,"['email', 'phone', 'reviews', 'kba']",t,t,"Durnell Avenue, Boston, MA 02131, United States",Roslindale,Roslindale,,Boston,MA,2131.0,Boston,"Boston, MA",US,United States,42.284512,-71.136258,t,House,Private room,2,1.5,1.0,2.0,Real Bed,"{Internet,""Wireless Internet"",""Air Conditionin...",,$79.00,,,,$15.00,1,$0.00,2,31,2 weeks ago,,13,34,59,334,2016-09-06,29,2015-08-18,2016-09-01,99.0,10.0,10.0,10.0,10.0,9.0,10.0,f,,,f,flexible,f,f,1,2.25,1


In [32]:
df.describe(include="all")

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,reviews_per_month,occupation_percentage
count,3585.0,3585,3585.0,3585,3585,3442,2528,3585,3585,2170,1610,2295,2096,2031,2393,2986,2986,3585,2986,3585.0,3585,3585,3585,3574,2276,3114,3114,3114,3585,3585,3585,3246,3585.0,3585.0,3585,3585,3585,3585,3042,3585,0.0,3583,3585,3547.0,3571,3585,3585,3585,3585.0,3585.0,3585,3582,3585,3585.0,3571.0,3575.0,3576.0,3585,3585,56.0,3585,892,888,1342,2478,3585.0,3585,3585.0,3585.0,3585,0.0,3585.0,3585.0,3585.0,3585.0,3585,3585.0,2829,2829,2772.0,2762.0,2767.0,2765.0,2767.0,2763.0,2764.0,3585,0.0,0.0,3585,3585,3585,3585,3585.0,2829.0,3585.0
unique,,3585,,1,3504,3113,2268,3423,1,1728,1269,1859,1762,1617,1928,2986,2986,3585,2986,,2181,1334,1281,176,1240,4,52,72,2,2174,2174,53,,,83,2,2,1239,30,25,,38,1,43.0,4,39,1,1,,,2,13,3,,,,,5,3092,,324,243,288,54,79,,51,,,38,,,,,,1,,975,404,,,,,,,,1,,,2,4,2,2,,,
top,,https://www.airbnb.com/rooms/6426364,,2016-09-07,Lux 2BR by Fenway w/WiFi,"The unit is stylishly designed for comfort, va...",The studio's kitchenette comes equipped with a...,Studio offers the luxury of home furnishings w...,none,Conveniently bordering the neighborhoods of th...,Although we do not allow pets in our apartment...,Located less than a five minute walk to Back B...,"You have access to the entire apartment, and a...",Need more towels? A restaurant recommendation?...,House Rules 1. Check-in is 4 pm local time. If...,https://a2.muscache.com/im/pictures/91542103/9...,https://a2.muscache.com/im/pictures/3361c60a-6...,https://a2.muscache.com/im/pictures/ac80a478-5...,https://a2.muscache.com/im/pictures/1cfd35ce-b...,,https://www.airbnb.com/users/show/30283594,Kara,2015-03-30,"Boston, Massachusetts, United States",ABOUT US:\r\n\r\nSeamlessTransition is a full-...,within an hour,100%,100%,f,https://a2.muscache.com/im/users/30283594/prof...,https://a2.muscache.com/im/users/30283594/prof...,Allston-Brighton,,,"['email', 'phone', 'reviews', 'jumio']",t,t,"Boylston Street, Boston, MA 02215, United States",Allston-Brighton,Jamaica Plain,,Boston,MA,2116.0,Boston,"Boston, MA",US,United States,,,t,Apartment,Entire home/apt,,,,,Real Bed,{},,$150.00,"$1,000.00","$3,500.00",$100.00,$100.00,,$0.00,,,today,,,,,,2016-09-06,,2016-04-05,2016-09-05,,,,,,,,f,,,f,strict,f,f,,,
freq,,1,,3585,8,21,17,7,3585,21,44,26,48,58,44,1,1,1,1,,136,138,137,2421,79,1384,2072,1210,3178,136,136,375,,,930,3577,2603,64,364,343,,3381,3585,388.0,3568,3381,3585,3585,,,3080,2612,2127,,,,,3453,32,,144,48,30,299,353,,2211,,,833,,,,,,3585,,23,118,,,,,,,,3585,,,2991,1582,3518,3348,,,
mean,8440875.0,,20160910000000.0,,,,,,,,,,,,,,,,,24923110.0,,,,,,,,,,,,,58.902371,58.902371,,,,,,,,,,,,,,,42.340032,-71.084818,,,,3.041283,1.221647,1.255944,1.60906,,,858.464286,,,,,,1.429847,,3.171269,28725.84,,,8.64993,21.833194,38.558159,179.346444,,19.04463,,,91.916667,9.431571,9.258041,9.646293,9.646549,9.414043,9.168234,,,,,,,,12.733891,1.970908,2.973222
std,4500787.0,,0.0,,,,,,,,,,,,,,,,,22927810.0,,,,,,,,,,,,,171.119663,171.119663,,,,,,,,,,,,,,,0.024403,0.031565,,,,1.778929,0.501487,0.75306,1.011745,,,608.87431,,,,,,1.056787,,8.874133,1670136.0,,,10.43533,21.860966,33.158272,142.13618,,35.571658,,,9.531686,0.931863,1.168977,0.762753,0.735507,0.903436,1.011116,,,,,,,,29.415076,2.120561,1.749636
min,3353.0,,20160910000000.0,,,,,,,,,,,,,,,,,4240.0,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,,,,,,42.235942,-71.171789,,,,1.0,0.0,0.0,0.0,,,0.0,,,,,,0.0,,1.0,1.0,,,0.0,0.0,0.0,0.0,,0.0,,,20.0,2.0,2.0,2.0,4.0,2.0,2.0,,,,,,,,1.0,0.01,1.0
25%,4679319.0,,20160910000000.0,,,,,,,,,,,,,,,,,6103425.0,,,,,,,,,,,,,1.0,1.0,,,,,,,,,,,,,,,42.329995,-71.105083,,,,2.0,1.0,1.0,1.0,,,415.0,,,,,,1.0,,1.0,365.0,,,0.0,0.0,0.0,19.0,,1.0,,,89.0,9.0,9.0,9.0,9.0,9.0,9.0,,,,,,,,1.0,0.48,1.0
50%,8577620.0,,20160910000000.0,,,,,,,,,,,,,,,,,19281000.0,,,,,,,,,,,,,2.0,2.0,,,,,,,,,,,,,,,42.345201,-71.078429,,,,2.0,1.0,1.0,1.0,,,825.0,,,,,,1.0,,2.0,1125.0,,,4.0,16.0,37.0,179.0,,5.0,,,94.0,10.0,10.0,10.0,10.0,10.0,9.0,,,,,,,,2.0,1.17,3.0
75%,12789530.0,,20160910000000.0,,,,,,,,,,,,,,,,,36221470.0,,,,,,,,,,,,,7.0,7.0,,,,,,,,,,,,,,,42.354685,-71.062155,,,,4.0,1.0,2.0,2.0,,,1200.0,,,,,,1.0,,3.0,1125.0,,,15.0,40.0,68.0,325.0,,21.0,,,98.25,10.0,10.0,10.0,10.0,10.0,10.0,,,,,,,,6.0,2.72,5.0


In [30]:
df.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'summary',
       'space', 'description', 'experiences_offered', 'neighborhood_overview',
       'notes', 'transit', 'access', 'interaction', 'house_rules',
       'thumbnail_url', 'medium_url', 'picture_url', 'xl_picture_url',
       'host_id', 'host_url', 'host_name', 'host_since', 'host_location',
       'host_about', 'host_response_time', 'host_response_rate',
       'host_acceptance_rate', 'host_is_superhost', 'host_thumbnail_url',
       'host_picture_url', 'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'street',
       'neighbourhood', 'neighbourhood_cleansed',
       'neighbourhood_group_cleansed', 'city', 'state', 'zipcode', 'market',
       'smart_location', 'country_code', 'country', 'latitude', 'longitude',
       'is_location_exact', 'property_type', 'room_type', 'accommodates',
       'bathrooms',

Column analysis

column: id, dimension: property, strategy: none <br>
column: listing_url, dimension: none, strategy: drop - out of scope for this analysis <br>
column: scrape_id, dimension: none, strategy: drop - out of scope for this analysis <br>
column: last_scraped, dimension: none, strategy: drop - out of scope for this analysis <br>
column: name, dimension: property, strategy: drop - out of scope for this analysis <br>
column: summary, dimension: property, strategy: drop - out of scope for this analysis <br>
column: space, dimension: property, strategy: drop - out of scope for this analysis <br>
column: description, dimension: property, strategy: drop - out of scope for this analysis <br>
column: experiences_offered, dimension: property, strategy: drop - all values 0 <br>
column: neighborhood_overview, dimension: property, strategy: drop - out of scope for this analysis <br>
column: notes, dimension: property, strategy: drop - out of scope for this analysis <br>
column: transit, dimension: property, strategy: drop - out of scope for this analysis <br>
column: access, dimension: property, strategy: drop - out of scope for this analysis <br>
column: interaction, dimension: property, strategy: drop - out of scope for this analysis <br>
column: house_rules, dimension: property, strategy: drop - out of scope for this analysis <br>
column: thumbnail_url, dimension: property, strategy: drop - out of scope for this analysis <br>
column: medium_url, dimension: property, strategy: drop - out of scope for this analysis <br>
column: picture_url, dimension: property, strategy: drop - out of scope for this analysis <br>
column: xl_picture_url, dimension: property, strategy: drop - out of scope for this analysis <br>
column: host_id, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_url, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_name, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_since, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_location, dimension: host, strategy: transform to column host in city <br>
column: host_about, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_response_time, dimension: host, strategy: get dummies <br>
column: host_response_rate, dimension: host, strategy: clear characters to be continuous variable <br>
column: host_acceptance_rate, dimension: host, strategy: Drop because having a high acceptance rate is directly related to having a high occupation percentage <br>
column: host_is_superhost, dimension: host, strategy: get dummies <br>
column: host_thumbnail_url, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_picture_url, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_neighbourhood, dimension: host, strategy: drop - out of scope for this analysis <br>
column: host_listings_count, dimension: host, strategy: drop - redundant with the following column <br>
column: host_total_listings_count, dimension: host, strategy: none <br>
column: host_verifications, dimension: host, strategy: apply special function to get dummies <br>
column: host_has_profile_pic, dimension: host, strategy: get dummies <br>
column: host_identity_verified, dimension: host, strategy: get dummies <br>
column: street, dimension: property, strategy: drop - out of scope for this analysis <br>
column: neighbourhood, dimension: property, strategy: drop - out of scope for this analysis <br>
column: neighbourhood_cleansed, dimension: property, strategy: get dummies and experiment geographic variables <br>
column: neighbourhood_group_cleansed, dimension: property, strategy: drop - out of scope for this analysis <br>
column: city, dimension: property, strategy: use to filter only properties in Boston and then drop <br>
column: state, dimension: property, strategy: drop - out of scope for this analysis <br>
column: zipcode, dimension: property, strategy: get dummies and experiment geographic variables <br>
column: market, dimension: property, strategy: drop - out of scope for this analysis <br>
column: smart_location, dimension: property, strategy: drop - out of scope for this analysis <br>
column: country_code, dimension: property, strategy: drop - out of scope for this analysis <br>
column: country, dimension: property, strategy: drop - out of scope for this analysis <br>
column: latitude, dimension: property, strategy: drop - none and experiment geographic variables <br>
column: longitude, dimension: property, strategy: drop - none and experiment geographic variables<br>
column: is_location_exact, dimension: property, strategy: drop - out of scope for this analysis <br>
column: property_type, dimension: property, strategy: get dummies <br>
column: room_type, dimension: property, strategy: get dummies <br>
column: accommodates, dimension: property, strategy: none <br>
column: bathrooms, dimension: property, strategy: none <br>
column: bedrooms, dimension: property, strategy: none <br>
column: beds, dimension: property, strategy: none <br>
column: bed_type, dimension: property, strategy: get dummies <br>
column: amenities, dimension: property, strategy: apply special function to get dummies and use in final experiment <br>
column: square_feet, dimension: property, strategy: none <br>
column: price, dimension: property, strategy: clear characters to be continuous variable <br>
column: weekly_price, dimension: property, strategy: drop - is closely related to the price column <br>
column: monthly_price, dimension: property, strategy: drop - is closely related to the price column <br>
column: security_deposit, dimension: property, strategy: clear characters to be continuous variable <br>
column: cleaning_fee, dimension: property, strategy: clear characters to be continuous variable <br>
column: guests_included, dimension: property, strategy: none <br>
column: extra_people, dimension: property, strategy: clear characters to be continuous variable <br>
column: minimum_nights, dimension: property, strategy: none <br>
column: maximum_nights, dimension: property, strategy: none <br>
column: calendar_updated, dimension: property, strategy: drop - out of scope for this analysis <br>
column: has_availability, dimension: property, strategy: Drop because having availability is directly related to having a low occupation percentage <br>
column: availability_30, dimension: property, strategy: Drop because having availability is directly related to having a low occupation percentage <br>
column: availability_60, dimension: property, strategy: Drop because having availability is directly related to having a low occupation percentage <br>
column: availability_90, dimension: property, strategy: Drop because having availability is directly related to having a low occupation percentage <br>
column: availability_365, dimension: property, strategy: Drop because having availability is directly related to having a low occupation percentage <br>
column: calendar_last_scraped, dimension: property, strategy: drop - out of scope for this analysis <br>
column: number_of_reviews, dimension: reviews, strategy: none <br>
column: first_review, dimension: reviews, strategy: drop - out of scope for this analysis <br>
column: last_review, dimension: reviews, strategy: drop - out of scope for this analysis <br>
column: review_scores_rating, dimension: reviews, strategy: none <br>
column: review_scores_accuracy, dimension: reviews, strategy: none <br>
column: review_scores_cleanliness, dimension: reviews, strategy: none <br>
column: review_scores_checkin, dimension: reviews, strategy: none <br>
column: review_scores_communication, dimension: reviews, strategy: none <br>
column: review_scores_location, dimension: reviews, strategy: none <br>
column: review_scores_value, dimension: reviews, strategy: none <br>
column: requires_license, dimension: property, strategy: drop - all values f <br>
column: license, dimension: reviews, property: drop - all values nan <br>
column: jurisdiction_names, dimension: property, strategy: drop - all values nan <br>
column: instant_bookable, dimension: property, strategy: get dummies <br>
column: cancellation_policy, dimension: property, strategy: get dummies <br>
column: require_guest_profile_picture, property: reviews, strategy: get dummies <br>
column: require_guest_phone_verification, dimension: reviews, strategy: get dummies <br>
column: calculated_host_listings_count, dimension: host, strategy: drop - redundant with host_total_listings_count column <br>
column: reviews_per_month, dimension: reviews, strategy: Drop because having more reviews per period of time is directly related to having a high occupation percentage <br>

In [None]:
# nan distribution, drop nan values except for reviews because are the online one that are importante for one dimension 
# tienen porcentaje considerable de valores falantes