In [0]:
# Generic imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
!pip install category_encoders
!pip install geocoder



In [3]:
!gunzip calendar.csv.gz
!gunzip listings.csv.gz

gzip: calendar.csv.gz: No such file or directory
gzip: listings.csv.gz: No such file or directory


In [0]:
# Load calendar data
df_cal = pd.read_csv('calendar.csv')

In [5]:
df_cal.head()

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,35922,2019-05-05,t,$900.00,$900.00,31.0,1125.0
1,85246,2019-05-05,f,$97.00,$97.00,5.0,190.0
2,85246,2019-05-06,f,$97.00,$97.00,5.0,190.0
3,85246,2019-05-07,f,$97.00,$97.00,5.0,190.0
4,85246,2019-05-08,f,$97.00,$97.00,5.0,190.0


In [6]:
df_cal.dtypes

listing_id          int64
date               object
available          object
price              object
adjusted_price     object
minimum_nights    float64
maximum_nights    float64
dtype: object

In [7]:
# Check for NaN values
df_cal.isnull().sum()

listing_id          0
date                0
available           0
price             365
adjusted_price    365
minimum_nights      6
maximum_nights      6
dtype: int64

In [8]:
df_cal.shape

(16043437, 7)

In [9]:
df_cal.listing_id.nunique()

43954

In [10]:
df_cal_price_diff = df_cal[df_cal["adjusted_price"] != df_cal["price"]]
df_cal_price_diff.listing_id.nunique()

998

In [11]:
df_cal.listing_id.nunique()

43954

In [0]:
# Adjusted price is highly colinear to price. So we can choose either one.
df_cal.drop(columns=['adjusted_price'], inplace=True)

# Minimum and Maximum nights shall be considered from listings.
df_cal.drop(columns=['minimum_nights', 'maximum_nights'], inplace=True)

In [0]:
# Convert price to float
# Calculated the median value initially by setting NaN to 0.
# median is initialized to 115.0 based on that.
df_cal.price.fillna(value='$'+str(115.0), inplace=True)
df_cal.price = df_cal.price.apply(lambda x: float(str(x)[1:].replace(',','')))

# Update to numerical form
df_cal['available'] = \
   df_cal['available'].apply(lambda x: 1 if x == 't' else 0)

# Add data and season related information
df_cal['date'] = pd.to_datetime(df_cal['date'])
df_cal['year'] = df_cal['date'].dt.year
df_cal['month'] = df_cal['date'].dt.month
df_cal['day_of_month'] = df_cal['date'].dt.day
df_cal['day_of_week'] = df_cal['date'].dt.weekday

In [14]:
df_cal.describe(exclude=[np.number])

Unnamed: 0,date
count,16043437
unique,367
top,2019-07-12 00:00:00
freq,43954
first,2019-05-05 00:00:00
last,2020-05-05 00:00:00


In [15]:
df_cal.describe()

Unnamed: 0,listing_id,available,price,year,month,day_of_month,day_of_week
count,16043440.0,16043440.0,16043440.0,16043440.0,16043440.0,16043440.0,16043440.0
mean,20063560.0,0.4435412,210.9703,2019.341,6.517787,15.78745,2.999604
std,9871730.0,0.4968022,511.7769,0.4741004,3.455028,8.80393,2.003439
min,109.0,0.0,0.0,2019.0,1.0,1.0,0.0
25%,12953810.0,0.0,73.0,2019.0,4.0,8.0,1.0
50%,21371410.0,0.0,115.0,2019.0,7.0,16.0,3.0
75%,28678310.0,1.0,195.0,2020.0,10.0,23.0,5.0
max,34426410.0,1.0,25000.0,2020.0,12.0,31.0,6.0


In [16]:
# Check for NaN values
df_cal.isnull().sum()

listing_id      0
date            0
available       0
price           0
year            0
month           0
day_of_month    0
day_of_week     0
dtype: int64

In [17]:
df_listing = pd.read_csv('listings.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [18]:
df_listing.shape

(43954, 106)

In [0]:
pd.set_option('display.max_columns', None)  # Unlimited columns
pd.set_option('display.max_rows', None)  # Unlimited rows

In [20]:
df_listing.isnull().sum()

id                                                  0
listing_url                                         0
scrape_id                                           0
last_scraped                                        0
name                                                4
summary                                          1698
space                                           11319
description                                       954
experiences_offered                                 0
neighborhood_overview                           14977
notes                                           21588
transit                                         16853
access                                          16670
interaction                                     16005
house_rules                                     13286
thumbnail_url                                   43954
medium_url                                      43954
picture_url                                         0
xl_picture_url              

In [21]:
df_listing.sample(2)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,thumbnail_url,medium_url,picture_url,xl_picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,street,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,city,state,zipcode,market,smart_location,country_code,country,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,price,weekly_price,monthly_price,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,license,jurisdiction_names,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
3945,9682083,https://www.airbnb.com/rooms/9682083,20190505154550,2019-05-06,Quiet Studio Under Hollywood Sign,Private studio in Beachwood Canyon steps from ...,Tucked away on a rustic canyon road in a peace...,Private studio in Beachwood Canyon steps from ...,none,"The neighborhood is hillside, Los Angeles cany...",The space is small but gracious. It is best s...,There is a major transportation hub minutes fr...,The studio sits over our garage (used only occ...,I make myself available to greet guests as the...,,,,https://a0.muscache.com/im/pictures/325b774a-f...,,6151105,https://www.airbnb.com/users/show/6151105,Juliette,2013-04-29,"Los Angeles, California, United States","originally from NY, now living in Los Angeles....",within an hour,100%,,t,https://a0.muscache.com/im/users/6151105/profi...,https://a0.muscache.com/im/users/6151105/profi...,Hollywood Hills,1.0,1.0,"['email', 'phone', 'reviews']",t,f,"Los Angeles, CA, United States",Hollywood Hills,Hollywood Hills,,Los Angeles,CA,90068,Los Angeles,"Los Angeles, CA",US,United States,34.12608,-118.31802,t,Guest suite,Entire home/apt,2,1.0,1.0,1.0,Real Bed,"{TV,""Cable TV"",Internet,Wifi,""Air conditioning...",,$120.00,,,,$50.00,2,$15.00,1,1125,1,1,1125,1125,1.0,1125.0,today,t,7,12,25,248,2019-05-06,63,33,2016-02-15,2019-05-03,100.0,10.0,10.0,10.0,10.0,10.0,10.0,f,,"{""City of Los Angeles"","" CA""}",t,f,strict_14_with_grace_period,f,f,1,1,0,0,1.61
10885,20095535,https://www.airbnb.com/rooms/20095535,20190505154550,2019-05-06,"Center of LA, Cozy in Echo Park",1 bedroom with private bathroom available in h...,Echo park in a neighborhood in Central LA. Its...,1 bedroom with private bathroom available in h...,none,Echo Park is known to be a “hipster” neighborh...,This is not a listing for the entire space to ...,Uber is pretty cheap depending on time (rates ...,Full access to kitchen and living room area. N...,For any questions please send me a text.,ABSOLUTELY NO SHOES ALLOWED. This is not a pre...,,,https://a0.muscache.com/im/pictures/e92e6ce5-4...,,27540775,https://www.airbnb.com/users/show/27540775,Ruby,2015-02-10,"California, United States",Hello! My name is Ruby:) I usually travel alon...,within an hour,100%,,f,https://a0.muscache.com/im/pictures/fe112bcf-4...,https://a0.muscache.com/im/pictures/fe112bcf-4...,,1.0,1.0,"['email', 'phone', 'reviews', 'jumio', 'govern...",t,t,"Los Angeles, CA, United States",Echo Park,Echo Park,,Los Angeles,CA,90026,Los Angeles,"Los Angeles, CA",US,United States,34.08302,-118.25941,f,Apartment,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Wifi,""Air conditioning"",Kitchen,""Free stre...",,$57.00,,,,$20.00,2,$30.00,1,3,1,1,3,3,1.0,3.0,a week ago,t,7,15,20,24,2019-05-06,12,12,2019-02-15,2019-05-01,96.0,10.0,10.0,10.0,9.0,9.0,9.0,f,,"{""City of Los Angeles"","" CA""}",t,f,flexible,f,f,1,0,1,0,4.44


In [0]:
# Dropping columns which are either.
# 1. Difficult to parse. (NLP)
# 2. Not much variance.
# 3. To many NaN values
# 4. Carries no valuable information.

columns = ['listing_url',
           'scrape_id',
           'last_scraped',
           'name',
           'summary',
           'space',
           'description',
           'experiences_offered',
           'neighborhood_overview',
           'notes',
           'transit',
           'access',
           'interaction',
           'house_rules',
           'thumbnail_url',
           'medium_url',
           'picture_url',
           'xl_picture_url',
           'host_id',
           'host_url',
           'host_name',
           'host_location',
           'host_about',
           'host_acceptance_rate',
           'host_thumbnail_url',
           'host_picture_url',
           'host_neighbourhood',
           'host_listings_count',
           'host_verifications',
           'street',
           'neighbourhood',
           'neighbourhood_cleansed',
           'neighbourhood_group_cleansed',
           'city',
           'state',
           'market',
           'smart_location',
           'country_code',
           'country',
           'calendar_last_scraped',
           'license',
           'jurisdiction_names',
           'square_feet',
           'weekly_price',
           'monthly_price']

df_listing.drop(columns=columns, inplace=True)

In [23]:
df_listing.security_deposit.head()

0    $1,000.00
1      $400.00
2      $100.00
3          NaN
4        $0.00
Name: security_deposit, dtype: object

In [0]:
import geocoder
from time import sleep

def get_zipcode_from_latlng(lat, lng):
  
  # Update MapBox API key
  MAPBOX_ACCESS_TOKEN=""
  
  latlng = [lat, lng]
  g = geocoder.mapbox(latlng, key=MAPBOX_ACCESS_TOKEN, method='reverse')
  sleep(1)
  
  if not g.ok:
    return np.nan
  
  print(f"{lat},{lng},{g.json['postal']}")
  
  return g.json['postal']

def fetch_zipcode(row):
  if isinstance(row['zipcode'], float) and np.isnan(row['zipcode']):
    return get_zipcode_from_latlng(row['latitude'], row['longitude'])

  return row['zipcode']

def cleanup_zipcode(col):
  clean_up = {
      'Near 91304': '91304',
      '90014-3002': '90014',
      '90028\n\n90017': '90028',
      '91744-3228': '91744',
      '90039-2715': '90039',
      'CA91780': '91780',
      'CA91748': '91748',
      '91001-2243': '91001',
      '139 S Valencia Ave, Glendora. 91741': '91741',
      'CA 91765': '91765',
      '90005-3747': '90005',
      '90036-2514': '90036',
      '0': np.nan,
      '900': np.nan,
      '91606-1412': '91606',
      90.0: np.nan
  }
  
  if col in clean_up:
    return clean_up[col]
  
  if isinstance(col, str):
    return col
  
  if np.isnan(col):
    return col
  
  return str(col).split(".")[0]

In [25]:
"""
# Clean up zipcodes
df_listing['zipcode'] = \
    df_listing['zipcode'].apply(cleanup_zipcode)

# Update missing zipcodes
df_listing['zipcode'] = \
  df_listing.apply(fetch_zipcode, axis=1)

# Export the csv file to avoid invoking of MapBox API repeatedly
df_listing.to_csv ('listings_with_zipcode.csv', index = False, header=True)

df_listing['zipcode'].nunique()
"""

"\n# Clean up zipcodes\ndf_listing['zipcode'] =     df_listing['zipcode'].apply(cleanup_zipcode)\n\n# Update missing zipcodes\ndf_listing['zipcode'] =   df_listing.apply(fetch_zipcode, axis=1)\n\n# Export the csv file to avoid invoking of MapBox API repeatedly\ndf_listing.to_csv ('listings_with_zipcode.csv', index = False, header=True)\n\ndf_listing['zipcode'].nunique()\n"

In [0]:
# Re-init df_listing
df_listing = pd.read_csv('listings_with_zipcode.csv')

In [0]:
def cleanup_hostresponserate(col):
  if isinstance(col, float):
    return 0.0
  
  return int(col.replace('%',''))

def cleanup_securitydeposit(col):
  if isinstance(col, float):
    return 0.0
  
  return float(col.replace('$','').replace(',',''))

def cleanup_cleaningfee(col):
  if isinstance(col, float):
    return 0.0
  
  return float(col.replace('$','').replace(',',''))

In [0]:
# Clean up procedure to removew NaNs

# Fill NaNs as unknown which shall be Label Encoded later
df_listing.host_response_time.fillna(value='unknown', inplace=True)

# Drop rows with NaN for following columns
df_listing.host_since.dropna(inplace=True)
df_listing.dropna(subset=['host_since', 
                  'host_is_superhost', 
                  'host_total_listings_count', 
                  'host_has_profile_pic', 
                  'host_identity_verified',
                  'bathrooms',
                  'bedrooms',
                  'beds'], inplace=True)

# Drop columns which shall not be used for modeling
columns = ['first_review',
           'last_review']

df_listing.drop(columns=columns, inplace=True)

# Convert response rate to float. Here replacing NaN with 0.
df_listing['host_response_rate'] = \
    df_listing['host_response_rate'].apply(cleanup_hostresponserate)

# Convert security deposit to float. Here replacing NaN with 0.
df_listing['security_deposit'] = \
    df_listing['security_deposit'].apply(cleanup_securitydeposit)

# Convert cleaning fee to float. Here replacing NaN with 0.
df_listing['cleaning_fee'] = \
    df_listing['cleaning_fee'].apply(cleanup_cleaningfee)

# Update the missing rewiew score values with median value.
review_columns = ['review_scores_rating',
                  'review_scores_accuracy',
                  'review_scores_cleanliness',
                  'review_scores_checkin',
                  'review_scores_communication',
                  'review_scores_location',
                  'review_scores_value']

for column in review_columns:
  median = df_listing[column].median()
  df_listing[column] = \
    df_listing[column].apply(lambda x: median if np.isnan(x) else x)

# Update the missing rewiew per month values with mean value .
mean = df_listing['reviews_per_month'].mean()
df_listing['reviews_per_month'] = \
    df_listing['reviews_per_month'].apply(lambda x: mean if np.isnan(x) else x)

In [29]:
df_listing.isnull().sum().sum()

0

In [0]:
# Feature Engineering
import category_encoders as ce
from datetime import datetime

In [0]:
# Derive the number of days since hosting from host since.
# Then to avoid colinearity drop host since.
today = datetime.today()

df_listing['host_since'] = pd.to_datetime(df_listing['host_since'])
df_listing['host_since_in_days'] = \
   df_listing['host_since'].apply(lambda x: (today - x).days)

df_listing.drop(columns=['host_since'], inplace=True)

# Update to numerical form
df_listing['host_is_superhost'] = \
   df_listing['host_is_superhost'].apply(lambda x: 1 if x == 't' else 0)
df_listing['host_has_profile_pic'] = \
   df_listing['host_has_profile_pic'].apply(lambda x: 1 if x == 't' else 0)
df_listing['host_identity_verified'] = \
   df_listing['host_identity_verified'].apply(lambda x: 1 if x == 't' else 0)
df_listing['is_location_exact'] = \
   df_listing['is_location_exact'].apply(lambda x: 1 if x == 't' else 0)


# Updating the property type to lis only top 10
v = df_listing['property_type'].value_counts() <= 366

df_listing.loc[df_listing['property_type'].isin(v.index[v]), \
               'property_type'] = 'Other'

# Get the total number of listed amenities. Here all amenities are 
# considered to be of same weightable but this is not true mostly.
df_listing['amenities_count'] = \
   df_listing['amenities'].apply(lambda x: len(x.split(",")))

df_listing.drop(columns=['amenities'], inplace=True)

# Convert price to float
# df_listing['price'] = \
#     df_listing['price'].apply(lambda x: 
#                               float(x.replace('$','').replace(',','')))

# Planning to use adjusted price form calendar. So drop this price.
df_listing.drop(columns=['price'], inplace=True)

# Convert extra people cost to float
df_listing['extra_people'] = \
    df_listing['extra_people'].apply(lambda x: \
                                     float(x.replace('$','').replace(',','')))

# Group the updates into days, weeks, months, years and never.
calendar_updated = {'today': 'days',
'2 weeks ago': 'weeks',
'a week ago': 'weeks',
'yesterday': 'days',
'3 weeks ago': 'weeks',
'2 months ago': 'months',
'3 days ago': 'days',
'4 days ago': 'days',
'4 weeks ago': 'weeks',
'3 months ago': 'months',
'5 days ago': 'days',
'5 weeks ago': 'months',
'2 days ago': 'days',
'4 months ago': 'months',
'6 weeks ago': 'months',
'5 months ago': 'months',
'6 months ago': 'months',
'6 days ago': 'days',
'9 months ago': 'months',
'7 weeks ago': 'months',
'7 months ago': 'months',
'10 months ago': 'months',
'8 months ago': 'months',
'11 months ago': 'months',
'1 week ago': 'weeks',
'15 months ago': 'years',
'14 months ago': 'years',
'12 months ago': 'years',
'16 months ago': 'years',
'17 months ago': 'years',
'13 months ago': 'years',
'23 months ago': 'years',
'22 months ago': 'years',
'21 months ago': 'years',
'18 months ago': 'years',
'20 months ago': 'years',
'19 months ago': 'years',
'24 months ago': 'years',
'35 months ago': 'years',
'34 months ago': 'years',
'26 months ago': 'years',
'25 months ago': 'years',
'33 months ago': 'years',
'30 months ago': 'years',
'never': 'never',
'28 months ago': 'years',
'27 months ago': 'years',
'29 months ago': 'years',
'36 months ago': 'years',
'31 months ago': 'years',
'41 months ago': 'years',
'37 months ago': 'years',
'39 months ago': 'years',
'42 months ago': 'years',
'32 months ago': 'years',
'38 months ago': 'years',
'46 months ago': 'years',
'40 months ago': 'years',
'44 months ago': 'years',
'45 months ago': 'years',
'43 months ago': 'years',
'47 months ago': 'years',
'48 months ago': 'years',
'49 months ago': 'years',
'53 months ago': 'years',
'51 months ago': 'years',
'57 months ago': 'years',
'54 months ago': 'years',
'52 months ago': 'years',
'50 months ago': 'years',
'56 months ago': 'years',
'59 months ago': 'years',
'61 months ago': 'years',
'55 months ago': 'years',
'60 months ago': 'years',
'58 months ago': 'years',
'87 months ago': 'years',
'62 months ago': 'years',
'65 months ago': 'years',
'72 months ago': 'years',
'70 months ago': 'years'}

df_listing['calendar_updated'] = \
    df_listing['calendar_updated'].apply(lambda x: calendar_updated[x])

#  0 variance to dropping it.
df_listing.drop(columns=['has_availability'], inplace=True)

# Update to numerical form
df_listing['requires_license'] = \
   df_listing['requires_license'].apply(lambda x: 1 if x == 't' else 0)
df_listing['instant_bookable'] = \
   df_listing['instant_bookable'].apply(lambda x: 1 if x == 't' else 0)

#  0 variance to dropping it.
df_listing.drop(columns=['is_business_travel_ready'], inplace=True)

# Update to numerical form
df_listing['require_guest_profile_picture'] = \
   df_listing['require_guest_profile_picture'].apply(lambda x: 1 if x == 't' else 0)
df_listing['require_guest_phone_verification'] = \
   df_listing['require_guest_phone_verification'].apply(lambda x: 1 if x == 't' else 0)


In [32]:
df_listing.require_guest_phone_verification.value_counts()

0    42808
1     1077
Name: require_guest_phone_verification, dtype: int64

In [33]:
df_listing.dtypes

id                                                int64
host_response_time                               object
host_response_rate                              float64
host_is_superhost                                 int64
host_total_listings_count                       float64
host_has_profile_pic                              int64
host_identity_verified                            int64
zipcode                                           int64
latitude                                        float64
longitude                                       float64
is_location_exact                                 int64
property_type                                    object
room_type                                        object
accommodates                                      int64
bathrooms                                       float64
bedrooms                                        float64
beds                                            float64
bed_type                                        

In [34]:
df_listing.sample(10)

Unnamed: 0,id,host_response_time,host_response_rate,host_is_superhost,host_total_listings_count,host_has_profile_pic,host_identity_verified,zipcode,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,instant_bookable,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,host_since_in_days,amenities_count
24038,26041343,within an hour,100.0,1,1.0,1,0,90032,34.06416,-118.18358,0,House,Entire home/apt,7,1.5,2.0,3.0,Real Bed,100.0,100.0,7,20.0,1,1125,1,1,1125,1125,1.0,1125.0,months,9,17,29,29,41,41,97.0,10.0,10.0,10.0,10.0,10.0,10.0,0,0,flexible,0,0,1,1,0,0,3.95,1238,56
1928,30694695,within an hour,100.0,1,6.0,1,0,90032,34.08515,-118.19124,0,House,Entire home/apt,16,4.5,5.0,8.0,Real Bed,500.0,260.0,8,20.0,2,28,2,2,28,28,2.0,28.0,weeks,6,20,40,206,17,17,99.0,10.0,10.0,10.0,10.0,9.0,10.0,0,1,strict_14_with_grace_period,0,0,6,6,0,0,3.89,1139,22
13202,9304950,within an hour,100.0,0,20.0,1,0,90028,34.10316,-118.34075,1,Other,Private room,2,1.0,1.0,2.0,Real Bed,0.0,0.0,1,0.0,1,1125,1,1,1125,1125,1.0,1125.0,days,28,58,88,363,0,0,97.0,10.0,10.0,10.0,10.0,10.0,10.0,0,0,flexible,0,0,20,0,16,4,1.889684,2089,14
27989,4525134,within an hour,100.0,0,1.0,1,1,90057,34.06217,-118.28102,1,Guest suite,Entire home/apt,2,1.0,1.0,2.0,Real Bed,150.0,60.0,2,25.0,1,28,1,2,28,28,1.3,28.0,months,3,18,45,45,183,46,97.0,10.0,9.0,10.0,10.0,10.0,10.0,0,1,strict_14_with_grace_period,0,0,1,1,0,0,3.36,1691,49
331,26925471,within a few hours,100.0,0,3.0,1,0,90802,33.77086,-118.20009,1,Condominium,Entire home/apt,3,1.5,2.0,2.0,Real Bed,500.0,75.0,3,25.0,2,30,2,2,30,30,2.0,30.0,months,9,34,63,63,2,2,80.0,9.0,7.0,7.0,10.0,9.0,8.0,0,0,strict_14_with_grace_period,0,0,3,1,2,0,0.71,771,13
7334,20666077,within an hour,100.0,1,2.0,1,0,91042,34.25121,-118.27028,1,Guest suite,Entire home/apt,2,1.0,1.0,2.0,Real Bed,100.0,35.0,1,10.0,2,30,2,3,30,30,2.2,30.0,days,1,6,28,265,48,27,99.0,10.0,10.0,10.0,10.0,10.0,10.0,0,1,moderate,0,0,2,2,0,0,2.47,1000,40
15951,33435050,within an hour,100.0,0,1.0,1,0,91405,34.20387,-118.46721,0,Apartment,Shared room,2,1.0,1.0,2.0,Real Bed,100.0,150.0,1,0.0,1,1125,1,1,1125,1125,1.0,1125.0,months,2,2,25,205,0,0,97.0,10.0,10.0,10.0,10.0,10.0,10.0,0,1,flexible,0,0,1,0,0,1,1.889684,1016,20
16143,4831365,within an hour,94.0,1,13.0,1,0,91505,34.14622,-118.34266,1,Apartment,Entire home/apt,3,1.0,0.0,0.0,Real Bed,300.0,85.0,1,5.0,14,1200,14,14,1200,1200,14.0,1200.0,weeks,8,8,8,276,10,3,98.0,10.0,10.0,10.0,9.0,10.0,10.0,0,0,strict_14_with_grace_period,0,0,13,13,0,0,0.19,2109,28
20311,1885962,within an hour,97.0,0,4.0,1,0,90027,34.10105,-118.30634,1,Other,Private room,6,1.0,2.0,3.0,Real Bed,100.0,20.0,4,15.0,1,21,1,1,21,21,1.0,21.0,weeks,5,8,22,281,313,39,93.0,10.0,10.0,10.0,10.0,10.0,10.0,0,1,flexible,0,0,4,0,4,0,4.94,2059,24
35894,28696425,within an hour,100.0,0,1.0,1,0,90277,33.80737,-118.39074,1,House,Entire home/apt,11,3.0,4.0,3.0,Real Bed,1000.0,200.0,11,95.0,3,1125,3,3,1125,1125,3.0,1125.0,days,6,16,28,180,9,9,98.0,10.0,10.0,10.0,10.0,10.0,9.0,0,1,strict_14_with_grace_period,0,0,1,1,0,0,1.53,278,45


In [0]:
# Merge the 2 data sets together
# df_cal.merge(df_listing, left_on='listing_id', right_on='id')

In [36]:
"""
# Label encode host response time
df_listing['host_response_time']
df_listing['property_type']
df_listing['room_type']
df_listing['bed_type']
df_listing['calendar_updated']
df_listing['cancellation_policy']
"""

"\n# Label encode host response time\ndf_listing['host_response_time']\ndf_listing['property_type']\ndf_listing['room_type']\ndf_listing['bed_type']\ndf_listing['calendar_updated']\ndf_listing['cancellation_policy']\n"