## Feature Imputation using Machine Learning

In [1]:
# to prevent sklearn module not found error when import missforest
import sys
import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base

In [2]:
# set scikit learn to older version for missingpy import

In [3]:
#pip install scikit-learn==1.1.2

In [4]:
print(sklearn.__version__)

1.1.2


In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn
from missingpy import MissForest

In [6]:
# setting display options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

## Everything from Scratch

In [7]:
# read in data
trainpath = "train.csv"
df_train = pd.read_csv(trainpath, index_col='ID')
testpath = "test.csv"
df_test = pd.read_csv(testpath, index_col='ID')

# concatenate dataframes to reduce redundancies in operations
df = pd.concat([df_train, df_test])

In [8]:
# Data Cleaning

#Functions
def replace_string(df, c, s, r='', f='strip'):
    if f == 'find_replace':
        mask = (df[c].notnull()) & (df[c].astype(str).str.contains(s))
        df.loc[mask, c] = df.loc[mask, c].astype(str).str.replace(s, r)
    if f == 'replace':
        df[c] = df[c].replace(s, r)
    elif f == 'strip':
        df[c] = df[c].dropna().astype(str).str.replace(s, r, regex=True)
    return df

def replace_numeric(df, c, n, r=0, f='match'):
    if f == 'isgreater':
        df.loc[df[c] > n, c] = r
    elif f == 'isless':
        df.loc[df[c] < n, c] = r
    elif f == 'match':
        df.loc[df[c] == n, c] = r
    return df

def convert_numeric(df, c, t, d=1):
    df[c] = pd.to_numeric(df[c], errors='coerce')
    df[c] = df[c].astype(t)
    df[c] = df[c] / d
    return df

In [9]:
df2 = df

# price
df2 = replace_string(df2, 'price', '$','', 'strip')
df2 = replace_string(df2, 'price', ',','', 'strip')
df2 = convert_numeric(df2, 'price', 'float', 1)

# host_response_rate
df2 = replace_string(df2, 'host_response_rate', '%','', 'strip')
df2 = convert_numeric(df2, 'host_response_rate', 'float', 100)

# host_acceptance_rate
df2 = replace_string(df2, 'host_acceptance_rate', '%','', 'strip')
df2 = convert_numeric(df2, 'host_acceptance_rate', 'float', 100)

# bathrooms
df2 = replace_string(df2, 'bathrooms', 'Half-bath','0.5', 'find_replace')
df2 = replace_string(df2, 'bathrooms', 'half-bath','0.5', 'find_replace')
df2 = replace_string(df2, 'bathrooms', '[^0-9\.]','', 'strip')
df2 = convert_numeric(df2, 'bathrooms', 'float', 1)

# max/min nights - replace extreme values
df2 = replace_numeric(df2, 'maximum_nights', 9000, 1000, 'isgreater')
df2 = replace_numeric(df2, 'minimum_maximum_nights', 9000, 1000, 'isgreater')
df2 = replace_numeric(df2, 'maximum_maximum_nights', 9000, 1000, 'isgreater')
df2 = replace_numeric(df2, 'minimum_nights_avg_ntm', 9000, 1000, 'isgreater')
df2 = replace_numeric(df2, 'maximum_nights_avg_ntm', 9000, 1000, 'isgreater')

In [10]:
df3 = df2

# Create new features email, phone and work_email from host_verifications
df3 = replace_string(df3, 'host_verifications', "['email']","'1','0','0'", 'replace')
df3 = replace_string(df3, 'host_verifications', "['phone']","'0','1','0'", 'replace')
df3 = replace_string(df3, 'host_verifications', "['email', 'phone']","'1','1','0'", 'replace')
df3 = replace_string(df3, 'host_verifications', "['phone', 'work_email']","'0','1','1'", 'replace')
df3 = replace_string(df3, 'host_verifications', "['email', 'phone', 'work_email']","'1','1','1'", 'replace')

df3[['email', 'phone', 'work_email']] = df3['host_verifications'].str.split(',', expand=True)

df3 = replace_string(df3, 'email', "'",'', 'strip')
df3 = convert_numeric(df3, 'email','int', 1)

df3 = replace_string(df3, 'phone', "'",'', 'strip')
df3 = convert_numeric(df3, 'phone','int', 1)

df3 = replace_string(df3, 'work_email', "'",'', 'strip')
df3 = convert_numeric(df3, 'work_email','int', 1)

df3.drop(['host_verifications'], axis=1, inplace=True)

In [11]:
# Create new features smoke_alarm, kitchen, essential, hangers, wifi from amenities
# These are the top 5 ammenities in the dataset

from collections import Counter

amenity_count = Counter()
amenity_count_total = Counter()
count_total = []

for amenities_str in df3['amenities']:
    amenity_count_total = 0
    amenities_list = amenities_str.strip('][').replace('"', '').split(', ')
    for amenity in amenities_list:
        amenity_count[amenity] += 1
        amenity_count_total  += 1
    count_total.append(amenity_count_total)

df_amenities = pd.DataFrame(columns=[ 'amenity_count'])
df_amenities['amenity_count'] = amenity_count
df_amenities = df_amenities.sort_values('amenity_count', ascending=False)
df_amenities.head(5)

#df_acc = pd.DataFrame(columns=[ 'total_amenity_counts'])
#df_acc['total_amenity_counts'] = count_total
#df_acc
#print(acc)

#Smoke alarm	9548
#Kitchen	9383
#Essentials	9327
#Hangers	8702
#Wifi	8618

df3['amenity_count'] = count_total

df3[['smoke_alarm','kitchen','essentials','hangers','wifi']] = 0

for idx, amenities_str in df3['amenities'].items():
    amenities_list = amenities_str.strip('][').replace('"', '').split(', ')
    if 'Smoke alarm' in amenities_list:
        df3.loc[idx, 'smoke_alarm'] = 1
    if 'Kitchen' in amenities_list:
        df3.loc[idx, 'kitchen'] = 1        
    if 'Essentials' in amenities_list:
        df3.loc[idx, 'essentials'] = 1      
    if 'Hangers' in amenities_list:
        df3.loc[idx, 'hangers'] = 1      
    if 'Wifi' in amenities_list:
        df3.loc[idx, 'wifi'] = 1              


df3.drop(['amenities'], axis=1, inplace=True)

In [12]:
df5 = df3


#onehot encoder function
def onehot(df, c):
    for col in c:
        df = df.join(pd.get_dummies(df[[col]], drop_first=True))
        df.drop([col], axis=1, inplace=True)
    return df

#encode binary classifiers
# 'host_is_superhost','host_has_profile_pic','host_identity_verified','has_availability','instant_bookable'
df5 = onehot(df5, ['source', 'host_is_superhost','host_has_profile_pic','host_identity_verified','has_availability','instant_bookable'])

from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

#encode source
#le = encoder.fit_transform(df5['source'].values)
#df5['source'] = le
#print('source:', encoder.classes_)

#encode room_type
le = encoder.fit_transform(df5['room_type'].values)
df5['room_type'] = le
room_type_classes = encoder.classes_


#encode top 5 property_type and other
top_5_property_type = df5['property_type'].value_counts().nlargest(5).index.tolist()  
encoder.fit(top_5_property_type + ['other'])  
df5['property_type_encoded'] = encoder.transform(df5['property_type'].apply(lambda x: x if x in top_5_property_type else 'other'))
df5.drop(['property_type'], axis=1, inplace=True)
df5 = df5.rename(columns={'property_type_encoded': 'property_type'})
property_type_classes = encoder.classes_


#encode top 5 neighbourhood_cleansed and other
top_5_neighbourhood_cleansed = df5['neighbourhood_cleansed'].value_counts().nlargest(5).index.tolist()  
encoder.fit(top_5_neighbourhood_cleansed + ['other'])  
df5['neighbourhood_cleansed_encoded'] = encoder.transform(df5['neighbourhood_cleansed'].apply(lambda x: x if x in top_5_neighbourhood_cleansed else 'other'))
df5.drop(['neighbourhood_cleansed'], axis=1, inplace=True)
df5 = df5.rename(columns={'neighbourhood_cleansed_encoded': 'neighbourhood_cleansed'})
neighbourhood_cleansed_classes = encoder.classes_


# map/rank host_response_time
host_response_mapping = {'within an hour':1, 'within a few hours':2, 'within a day':3, 'a few days or more':4}
df5['host_response_time'] = df5['host_response_time'].map(host_response_mapping)

# convert host_since into days based on current date
from datetime import datetime
today = datetime.today()
df5['host_since'] = pd.to_datetime(df5['host_since'], format='%Y/%m/%d')
df5['host_since'] = (today - df5['host_since']).dt.days

In [13]:
df7 = df5
#df7.drop(['name', 'description','neighborhood_overview','host_name',
#          'host_about','neighbourhood','latitude','longitude'], axis=1, inplace=True)
df7.drop(['name', 'description','neighborhood_overview','host_name',
          'host_about','neighbourhood'], axis=1, inplace=True)

df7.drop(['host_location', #'host_response_rate','host_acceptance_rate',
          'host_neighbourhood',
          #'host_listings_count'
         ], 
          axis=1, inplace=True)
df7.drop(['minimum_minimum_nights', 'maximum_minimum_nights', 'minimum_maximum_nights', 'maximum_maximum_nights', 'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm'], axis=1, inplace=True)
#df7.drop(['number_of_reviews','number_of_reviews_ltm', 'number_of_reviews_l30d'], axis=1, inplace=True)
df7.drop(['first_review', 'last_review'], axis=1, inplace=True)
#df7.drop(['review_scores_accuracy', 'review_scores_cleanliness', 'review_scores_checkin', 'review_scores_communication', 'review_scores_location', 'review_scores_value'], axis=1, inplace=True) 
df7.drop(['calculated_host_listings_count', 'calculated_host_listings_count_entire_homes', 'calculated_host_listings_count_private_rooms', 'calculated_host_listings_count_shared_rooms'], axis=1, inplace=True)
df7.drop(['reviews_per_month'], axis=1, inplace=True)
 
df7

Unnamed: 0_level_0,host_since,host_response_time,host_response_rate,host_acceptance_rate,host_listings_count,latitude,longitude,room_type,accommodates,bathrooms,bedrooms,beds,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,price,email,phone,work_email,amenity_count,smoke_alarm,kitchen,essentials,hangers,wifi,source_previous scrape,host_is_superhost_t,host_has_profile_pic_t,host_identity_verified_t,has_availability_t,instant_bookable_t,property_type,neighbourhood_cleansed
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
0,3719,1.0,1.00,0.98,2.0,-37.82030,144.99016,0,2,1.0,1.0,1.0,2,14,0,0,0,12.0,741,37,1,4.88,4.91,4.97,4.94,4.93,4.93,4.82,132.0,1.0,1.0,0.0,38,1,0,1,1,1,0,0,1,1,1,0,5,3
1,4996,2.0,1.00,0.98,1.0,-37.76606,144.97951,2,2,1.0,1.0,1.0,4,27,0,12,22,112.0,169,25,3,4.48,4.64,3.97,4.72,4.69,4.65,4.60,39.0,1.0,1.0,0.0,57,1,1,1,1,0,0,0,1,1,1,0,4,5
2,4193,2.0,1.00,0.78,2.0,-37.90546,145.39447,0,4,2.5,2.0,4.0,2,365,30,60,90,365.0,8,2,0,4.75,4.88,4.75,4.88,4.50,5.00,4.75,270.0,1.0,1.0,0.0,21,1,1,0,0,1,0,1,1,1,1,0,2,4
3,4726,3.0,0.75,0.92,4.0,-37.82163,144.96672,2,2,2.5,1.0,1.0,1,730,30,60,90,365.0,2,0,0,4.50,4.00,4.50,4.00,4.00,5.00,4.00,1000.0,1.0,1.0,0.0,13,0,1,0,0,1,0,0,1,1,1,0,4,0
4,4697,2.0,1.00,0.87,1.0,-38.05725,145.33936,0,5,1.0,3.0,3.0,1,14,17,21,51,312.0,214,39,4,4.86,4.91,4.98,4.91,4.93,4.90,4.87,116.0,1.0,1.0,1.0,49,1,1,1,1,1,0,1,1,1,1,0,2,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,3213,,,,1.0,-37.82025,145.03088,2,2,1.0,1.0,1.0,1,1125,0,0,0,0.0,2,0,0,4.50,4.00,5.00,5.00,5.00,4.50,4.50,,1.0,1.0,0.0,13,1,1,1,1,1,1,0,1,1,1,0,4,5
9996,1197,,,,36.0,-37.83624,144.99299,0,2,1.0,1.0,1.0,1,90,1,1,1,268.0,42,9,0,4.21,4.50,4.29,4.74,4.64,4.74,4.19,,1.0,1.0,1.0,40,1,1,1,1,1,0,0,1,0,1,1,2,2
9997,3119,1.0,1.00,0.92,36.0,-37.86326,144.75456,0,16,3.5,6.0,8.0,2,365,9,39,69,340.0,0,0,0,,,,,,,,,1.0,1.0,0.0,42,1,1,1,1,1,0,0,1,1,1,1,1,5
9998,1759,,,,2.0,-37.80913,144.96058,2,2,0.5,1.0,,1,1125,0,0,0,0.0,3,0,0,3.00,4.00,3.00,2.33,3.33,4.00,3.33,,0.0,1.0,0.0,10,1,1,1,1,1,1,0,1,1,1,1,4,0


In [14]:
df_impute = df7.drop('price', axis=1)

In [15]:
df_impute.isnull().sum()

host_since                       0
host_response_time             737
host_response_rate             737
host_acceptance_rate           721
host_listings_count              0
latitude                         0
longitude                        0
room_type                        0
accommodates                     0
bathrooms                        6
bedrooms                       438
beds                            84
minimum_nights                   0
maximum_nights                   0
availability_30                  0
availability_60                  0
availability_90                  0
availability_365                67
number_of_reviews                0
number_of_reviews_ltm            0
number_of_reviews_l30d           0
review_scores_rating           263
review_scores_accuracy         321
review_scores_cleanliness      321
review_scores_checkin          322
review_scores_communication    322
review_scores_location         322
review_scores_value            322
email               

### Miss Forest Classifier

In [16]:
%%capture --no-display
imputer = MissForest()
X_imputed = imputer.fit_transform(df_impute)

In [17]:
X_imputed = pd.DataFrame(X_imputed, columns = df_impute.columns)

In [18]:
X_imputed.isnull().sum()

host_since                     0
host_response_time             0
host_response_rate             0
host_acceptance_rate           0
host_listings_count            0
latitude                       0
longitude                      0
room_type                      0
accommodates                   0
bathrooms                      0
bedrooms                       0
beds                           0
minimum_nights                 0
maximum_nights                 0
availability_30                0
availability_60                0
availability_90                0
availability_365               0
number_of_reviews              0
number_of_reviews_ltm          0
number_of_reviews_l30d         0
review_scores_rating           0
review_scores_accuracy         0
review_scores_cleanliness      0
review_scores_checkin          0
review_scores_communication    0
review_scores_location         0
review_scores_value            0
email                          0
phone                          0
work_email

In [19]:
X_imputed

Unnamed: 0,host_since,host_response_time,host_response_rate,host_acceptance_rate,host_listings_count,latitude,longitude,room_type,accommodates,bathrooms,bedrooms,beds,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,email,phone,work_email,amenity_count,smoke_alarm,kitchen,essentials,hangers,wifi,source_previous scrape,host_is_superhost_t,host_has_profile_pic_t,host_identity_verified_t,has_availability_t,instant_bookable_t,property_type,neighbourhood_cleansed
0,3719.0,1.00,1.0000,0.9800,2.0,-37.82030,144.99016,0.0,2.0,1.0,1.0,1.00,2.0,14.0,0.0,0.0,0.0,12.0,741.0,37.0,1.0,4.880,4.9100,4.9700,4.9400,4.9300,4.9300,4.820,1.0,1.0,0.0,38.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,5.0,3.0
1,4996.0,2.00,1.0000,0.9800,1.0,-37.76606,144.97951,2.0,2.0,1.0,1.0,1.00,4.0,27.0,0.0,12.0,22.0,112.0,169.0,25.0,3.0,4.480,4.6400,3.9700,4.7200,4.6900,4.6500,4.600,1.0,1.0,0.0,57.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,5.0
2,4193.0,2.00,1.0000,0.7800,2.0,-37.90546,145.39447,0.0,4.0,2.5,2.0,4.00,2.0,365.0,30.0,60.0,90.0,365.0,8.0,2.0,0.0,4.750,4.8800,4.7500,4.8800,4.5000,5.0000,4.750,1.0,1.0,0.0,21.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,2.0,4.0
3,4726.0,3.00,0.7500,0.9200,4.0,-37.82163,144.96672,2.0,2.0,2.5,1.0,1.00,1.0,730.0,30.0,60.0,90.0,365.0,2.0,0.0,0.0,4.500,4.0000,4.5000,4.0000,4.0000,5.0000,4.000,1.0,1.0,0.0,13.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,0.0
4,4697.0,2.00,1.0000,0.8700,1.0,-38.05725,145.33936,0.0,5.0,1.0,3.0,3.00,1.0,14.0,17.0,21.0,51.0,312.0,214.0,39.0,4.0,4.860,4.9100,4.9800,4.9100,4.9300,4.9000,4.870,1.0,1.0,1.0,49.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,2.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,3213.0,1.92,0.9892,0.4650,1.0,-37.82025,145.03088,2.0,2.0,1.0,1.0,1.00,1.0,1125.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,4.500,4.0000,5.0000,5.0000,5.0000,4.5000,4.500,1.0,1.0,0.0,13.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,4.0,5.0
9996,1197.0,1.00,0.9805,0.9865,36.0,-37.83624,144.99299,0.0,2.0,1.0,1.0,1.00,1.0,90.0,1.0,1.0,1.0,268.0,42.0,9.0,0.0,4.210,4.5000,4.2900,4.7400,4.6400,4.7400,4.190,1.0,1.0,1.0,40.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,2.0,2.0
9997,3119.0,1.00,1.0000,0.9200,36.0,-37.86326,144.75456,0.0,16.0,3.5,6.0,8.00,2.0,365.0,9.0,39.0,69.0,340.0,0.0,0.0,0.0,1.026,2.5765,2.6289,2.8462,2.5619,3.5439,2.299,1.0,1.0,0.0,42.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0
9998,1759.0,2.03,0.9463,0.5161,2.0,-37.80913,144.96058,2.0,2.0,0.5,1.0,1.06,1.0,1125.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,3.000,4.0000,3.0000,2.3300,3.3300,4.0000,3.330,0.0,1.0,0.0,10.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,4.0,0.0


In [21]:
# drop irrelevant column
X_imputed.drop('source_previous scrape', inplace=True, axis=1)

# add price back
X_imputed['price'] = df7.price

# create logarithmic price column
X_imputed['log_price'] = np.log(X_imputed['price'])

X_imputed.to_csv("missforest_imputed.csv")

## Reading in DF3

In [7]:
df = pd.read_csv("df_3.csv")

  df = pd.read_csv("df_3.csv")


In [8]:
df.shape[0]

10001

In [9]:
df.loc[8306:8311, ]

Unnamed: 0,ID,source,name,description,neighborhood_overview,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_neighbourhood,host_listings_count,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,beds,amenities,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,has_availability,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,price,email,phone,work_email,amenity_count,smoke_alarm,kitchen,essentials,hangers,wifi
8306,8306.0,city scrape,Marriott on Smith,Marriot on Smith accommodation is located on w...,,Kizzi,2014-04-17,"Collingwood, Australia",My name is Kizzi.\r\nMy husband's name is Mich...,within an hour,1.0,0.94,f,,1.0,t,t,,Yarra,-37.79864,144.98371,Entire townhouse,Entire home/apt,5.0,1.0,3.0,4.0,"[""Extra pillows and blankets"", ""Laundromat nea...",2,1125.0,2.0,2.0,365.0,365.0,2.0,365.0,t,8,32.0,57.0,,9.0,9.0,0.0,2022-10-12,2023-02-04,5.0,5.0,5.0,5.0,5.0,5.0,5.0,t,1.0,1.0,0.0,0.0,1.75,,1.0,1.0,0.0,59.0,1.0,1.0,1.0,1.0,0.0
8307,8307.0,city scrape,2 Bedroom Apartment in Heritage Listed City Haven,This art deco 2 bedroom apartment is full of c...,,Lauren,2012-10-20,"Melbourne, Australia",Friendly easy going young lady. Have just retu...,within a day,1.0,0.44,f,,2.0,t,t,,Melbourne,-37.814996,144.966028,Entire condo,Entire home/apt,4.0,1.0,1.0,2.0,"[""Smoke alarm"", ""Washer"", ""Wifi"", ""TV"", ""Dedic...",4,365.0,4.0,4.0,365.0,365.0,4.0,365.0,t,4,20.0,31.0,31.0,13.0,13.0,1.0,2022-06-27,2023-03-01,4.62,4.62,4.23,4.92,4.69,4.92,4.85,f,2.0,2.0,0.0,0.0,1.49,,1.0,1.0,1.0,7.0,1.0,1.0,0.0,0.0,1.0
8308,8308.0,city scrape,Beauty on Bay Street,Enjoy a stylish experience in one of THE BEST ...,Brighton is one of the most affluent suburbs i...,Suzanne,2015-04-30,"Melbourne, Australia",Air BNB Queen and super proud Mum of 3 childre...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8309,,within an hour,0.92,1.0,f,,7.0,t,t,"Brighton, Victoria, Australia",Bayside,-37.90391,144.99945,Entire condo,Entire home/apt,4,2.0,2.0,4.0,"[""Extra pillows and blankets"", ""Coffee maker: ...",2.0,365,2.0,2.0,1125.0,1125.0,2.0,1125.0,t,0.0,0.0,20.0,247.0,20.0,20.0,0.0,2022-07-08,2023-01-14,4.65,4.8,4.75,4.6,4.95,4.95,4.75,f,4.0,4.0,0.0,0.0,2.41,,1.0,1.0,0.0,48.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,
8310,8309.0,city scrape,Fernhem Cottages- Love in the Mist,Wake up to birdsong in the tree tops in this b...,"A tranquil, secluded location with beautiful t...",Holley,2016-04-21,,I’m a librarian from Melbourne who loves anima...,within an hour,1.0,1.0,f,,1.0,t,t,"Emerald, Victoria, Australia",Yarra Ranges,-37.90152,145.44471,Entire cabin,Entire home/apt,2.0,1.0,1.0,1.0,"[""Sukin conditioner"", ""Extra pillows and blank...",2,1125.0,2.0,2.0,1125.0,1125.0,2.0,1125.0,t,23,48.0,78.0,353.0,1.0,1.0,0.0,2023-01-01,2023-01-01,5.0,5.0,5.0,5.0,5.0,5.0,5.0,f,1.0,1.0,0.0,0.0,0.42,,1.0,1.0,0.0,55.0,1.0,1.0,1.0,1.0,1.0
8311,8310.0,city scrape,Stylish 2 Bedroom APT With CBD View Southbank,The property is under a very special family wh...,"There are pretty bourgeoisie all around, resid...",Myka,2022-01-08,"Doncaster, Australia",,within an hour,0.98,1.0,f,,49.0,t,t,"Southbank, Victoria, Australia",Melbourne,-37.82344,144.96687,Entire rental unit,Entire home/apt,5.0,1.0,2.0,2.0,"[""Extra pillows and blankets"", ""Microwave"", ""E...",2,60.0,1.0,4.0,60.0,60.0,1.8,60.0,t,10,29.0,49.0,49.0,27.0,27.0,2.0,2022-06-10,2023-02-24,4.59,4.63,4.59,4.37,4.63,4.89,4.63,f,48.0,48.0,0.0,0.0,2.91,,1.0,1.0,0.0,32.0,1.0,1.0,1.0,1.0,1.0


Somehow there is an additional entry at index position 8309 which was falsely created when saving the csv. It contains all values of entry 8308 from host response time onwards.

In [10]:
# Get the values from index 8309
values_to_insert = df.loc[8309, 'source':'email'].values

# Fill the missing values at index 8308 with the values from index 8309
df.loc[8308, 'host_response_time':] = values_to_insert

In [11]:
# delete index 8309
df.drop(index=8309, inplace=True)

In [12]:
#reset index
df.reset_index(drop=True, inplace=True)

In [13]:
df.head()

Unnamed: 0,ID,source,name,description,neighborhood_overview,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_neighbourhood,host_listings_count,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,beds,amenities,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,has_availability,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,price,email,phone,work_email,amenity_count,smoke_alarm,kitchen,essentials,hangers,wifi
0,0.0,city scrape,"The Stables, Richmond",Superbly located hotel style accommodation in ...,Richmond is a great neighbourhood. A beautifu...,Ione,2013-03-16,"Melbourne, Australia",I'm a working mum who loves being able to shar...,within an hour,1.0,0.98,f,Richmond,2.0,t,t,"Richmond, Victoria, Australia",Yarra,-37.8203,144.99016,Entire guesthouse,Entire home/apt,2.0,1.0,1.0,1.0,"[""Sukin conditioner"", ""Extra pillows and blank...",2,14.0,2.0,2.0,1125.0,1125.0,2.0,1125.0,t,0,0.0,0.0,12.0,741.0,37.0,1.0,2013-03-29,2023-02-18,4.88,4.91,4.97,4.94,4.93,4.93,4.82,f,2.0,2.0,0.0,0.0,6.11,132.0,1.0,1.0,0.0,38.0,1.0,0.0,1.0,1.0,1.0
1,1.0,city scrape,Room in Cool Deco Apartment in Brunswick East,A large air conditioned room with firm queen s...,This hip area is a crossroads between two grea...,Lindsay,2009-09-16,"Melbourne, Australia",As an artist working in animation and video I ...,within a few hours,1.0,0.98,f,Brunswick,1.0,t,t,"Brunswick East, Victoria, Australia",Moreland,-37.76606,144.97951,Private room in rental unit,Private room,2.0,1.0,1.0,1.0,"[""Extra pillows and blankets"", ""Laundromat nea...",4,27.0,4.0,4.0,27.0,27.0,4.0,27.0,t,0,12.0,22.0,112.0,169.0,25.0,3.0,2013-01-12,2023-03-08,4.48,4.64,3.97,4.72,4.69,4.65,4.6,f,1.0,0.0,1.0,0.0,1.37,39.0,1.0,1.0,0.0,57.0,1.0,1.0,1.0,1.0,0.0
2,2.0,city scrape,The Suite @ Angelus Retreat,<b>The space</b><br />Welcome to ANGELUS Retre...,,Margaret Jiin,2011-11-28,"Melbourne, Australia",I have very special interests in Life and Life...,within a few hours,1.0,0.78,t,,2.0,t,t,,Yarra Ranges,-37.90546,145.39447,Entire rental unit,Entire home/apt,4.0,2.5,2.0,4.0,"[""Microwave"", ""Hot tub"", ""Conditioner"", ""Smoke...",2,365.0,2.0,2.0,365.0,365.0,2.0,365.0,t,30,60.0,90.0,365.0,8.0,2.0,0.0,2015-07-06,2022-06-13,4.75,4.88,4.75,4.88,4.5,5.0,4.75,f,2.0,2.0,0.0,0.0,0.09,270.0,1.0,1.0,0.0,21.0,1.0,1.0,0.0,0.0,1.0
3,3.0,city scrape,Million Dollar Views Over Melbourne,<b>The space</b><br /><b>Enjoy Million Dollar ...,,Paul,2010-06-13,"Melbourne, Australia",Professional couple who enjoy entertaining in ...,within a day,0.75,0.92,f,Southbank,4.0,t,t,,Melbourne,-37.82163,144.96672,Private room in rental unit,Private room,2.0,2.5,1.0,1.0,"[""Hot tub"", ""Gym"", ""Washer"", ""Dryer"", ""Kitchen...",1,730.0,1.0,1.0,730.0,730.0,1.0,730.0,t,30,60.0,90.0,365.0,2.0,0.0,0.0,2011-10-16,2012-01-27,4.5,4.0,4.5,4.0,4.0,5.0,4.0,f,1.0,0.0,1.0,0.0,0.01,1000.0,1.0,1.0,0.0,13.0,0.0,1.0,0.0,0.0,1.0
4,4.0,city scrape,Melbourne - Old Trafford Apartment,After hosting many guests from all over the wo...,Our street is quiet & secluded but within walk...,Daryl & Dee,2010-07-12,"Berwick, Australia",We are an active couple who work from home and...,within a few hours,1.0,0.87,t,,1.0,t,t,"Berwick, Victoria, Australia",,-38.05725,145.33936,Entire rental unit,Entire home/apt,5.0,1.0,3.0,3.0,"[""Laundromat nearby"", ""Private patio or balcon...",1,14.0,1.0,1.0,14.0,14.0,1.0,14.0,t,17,21.0,51.0,312.0,214.0,39.0,4.0,2010-11-24,2023-03-03,4.86,4.91,4.98,4.91,4.93,4.9,4.87,f,1.0,1.0,0.0,0.0,1.43,116.0,1.0,1.0,1.0,49.0,1.0,1.0,1.0,1.0,1.0


In [14]:
df.columns

Index(['ID', 'source', 'name', 'description', 'neighborhood_overview',
       'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_neighbourhood', 'host_listings_count',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'latitude', 'longitude', 'property_type',
       'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'amenities', 'minimum_nights', 'maximum_nights',
       'minimum_minimum_nights', 'maximum_minimum_nights',
       'minimum_maximum_nights', 'maximum_maximum_nights',
       'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm', 'has_availability',
       'availability_30', 'availability_60', 'availability_90',
       'availability_365', 'number_of_reviews', 'number_of_reviews_ltm',
       'number_of_reviews_l30d', 'first_review', 'last_review',
       'review_scores_rating', 'review_score

In [15]:
columns_to_drop = ['ID', 'source', 'name', 'description', 'neighborhood_overview',
       'host_name', 'host_since', 
                   'host_location', 
                   'host_about',
       #'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       #'host_is_superhost', 
                   'host_neighbourhood', #'host_listings_count',
       #'host_has_profile_pic', 'host_identity_verified', 
                   'neighbourhood',
       #'neighbourhood_cleansed', 
                   'latitude', 'longitude', #'property_type',
       #'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'amenities', #'minimum_nights', 'maximum_nights',
       #'minimum_minimum_nights', 'maximum_minimum_nights',
       #'minimum_maximum_nights', 'maximum_maximum_nights',
       #'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm', #'has_availability',
       #'availability_30', 'availability_60', 'availability_90',
       #'availability_365', 'number_of_reviews', 'number_of_reviews_ltm',
       #'number_of_reviews_l30d', 
                   'first_review', 'last_review',
       #'review_scores_rating', 'review_scores_accuracy',
       #'review_scores_cleanliness', 'review_scores_checkin',
       #'review_scores_communication', 'review_scores_location',
       #'review_scores_value', 'instant_bookable',
       'calculated_host_listings_count',
       'calculated_host_listings_count_entire_homes',
       'calculated_host_listings_count_private_rooms',
       'calculated_host_listings_count_shared_rooms', #'reviews_per_month',
       #'price', 'email', 'phone', 'work_email', 'amenity_count', 'smoke_alarm',
       #'kitchen', 'essentials', 'hangers', 'wifi'
                  ]
df.drop(columns=columns_to_drop, inplace=True)

In [16]:
df.columns

Index(['host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_listings_count', 'host_has_profile_pic',
       'host_identity_verified', 'neighbourhood_cleansed', 'property_type',
       'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'has_availability', 'availability_30',
       'availability_60', 'availability_90', 'availability_365',
       'number_of_reviews', 'number_of_reviews_ltm', 'number_of_reviews_l30d',
       'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 'instant_bookable', 'reviews_per_month', 'price',
       'email', 'phone', 'work_email', '

In [17]:
# create df with categorical / continuous variables
columns_cont = ['host_response_time', 'host_response_rate',
       'host_acceptance_rate', #'host_is_superhost', 
                'host_listings_count',
       #'host_has_profile_pic', 'host_identity_verified',
       #'neighbourhood_cleansed', 'property_type', 'room_type', 
                'accommodates',
       'bathrooms', 'bedrooms', 'beds', 'minimum_nights',
       'maximum_nights', 'minimum_minimum_nights', 'maximum_minimum_nights',
       'minimum_maximum_nights', 'maximum_maximum_nights',
       'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm', #'has_availability',
       'availability_30', 'availability_60', 'availability_90',
       'availability_365', 'number_of_reviews', 'number_of_reviews_ltm',
       'number_of_reviews_l30d', 
       'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', #'instant_bookable', 
                'reviews_per_month', 'price',
       #'email', 'phone', 'work_email', 
                'amenity_count', #'smoke_alarm',
       #'kitchen', 'essentials', 'hangers', 'wifi'
               ]
df_cat = df.drop(columns=columns_cont)
df_cont = df[columns_cont]

### Categorical Feature Imputation

In [18]:
df_cat.isnull().sum().sort_values(ascending=False)

room_type                 175
neighbourhood_cleansed    150
property_type             123
host_is_superhost           2
host_has_profile_pic        0
host_identity_verified      0
has_availability            0
instant_bookable            0
email                       0
phone                       0
work_email                  0
smoke_alarm                 0
kitchen                     0
essentials                  0
hangers                     0
wifi                        0
dtype: int64

Since the benefit for imputing only small numbers of missing values with an algorithm, we focus on the features with many missing values only.

#### Room Type

In [19]:
# import previously cleaned dataframe to use as training data
df7 = pd.read_csv("df_7.csv")
df7.columns

Index(['ID', 'host_since', 'host_response_time', 'host_response_rate',
       'host_acceptance_rate', 'host_listings_count', 'latitude', 'longitude',
       'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'availability_30', 'availability_60',
       'availability_90', 'availability_365', 'number_of_reviews',
       'number_of_reviews_ltm', 'number_of_reviews_l30d',
       'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 'reviews_per_month', 'price', 'email', 'phone',
       'work_email', 'amenity_count', 'smoke_alarm', 'kitchen', 'essentials',
       'hangers', 'wifi', 'source_previous scrape', 'host_is

In [20]:
# impute price with median
from sklearn.impute import SimpleImputer

def impute_missing(df, c, s='most_frequent'):
    for col in c:
        i = SimpleImputer(missing_values = np.nan, strategy=s) 
        i = i.fit(df[[col]])
        df[[col]] = i.transform(df[[col]])
    return df

# price -> median
df7 = impute_missing(df7, ['price'], 'median')

In [21]:
# Filter the DataFrame for missing values in 'room_type' column
missing_index = df_cat[df_cat['room_type'].isna()].index.tolist()
        
# get training data --> other features which are not missing
room_X_train = [[df7.neighbourhood_cleansed[i], df7.property_type[i], df7.host_is_superhost_t[i], df7.phone[i], df7.host_identity_verified_t[i],
                 df7.bedrooms[i], df7.review_scores_value[i], df7.price[i],
                 df7.beds[i], df7.bathrooms[i], df7.amenity_count[i], df7.accommodates[i] ] for i in range(df.shape[0]) if i not in missing_index]

# get target data --> room type examples with no missing data
room_Y_train = [df.room_type[i] for i in range(df.shape[0]) if i not in missing_index]

# get test data --> other features which are missing
room_X_test = [[df7.neighbourhood_cleansed[i], df7.property_type[i], df7.host_is_superhost_t[i], df7.phone[i], df7.host_identity_verified_t[i],
                df7.bedrooms[i], df7.review_scores_value[i], df7.price[i],
                df7.beds[i], df7.bathrooms[i], df7.amenity_count[i], df7.accommodates[i] ] for i in range(df.shape[0]) if i in missing_index]

##### Stochastic Gradient Descent Classifier

##### Random Forest Classifier

##### Miss Forest Classifier

In [22]:
df

Unnamed: 0,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_has_profile_pic,host_identity_verified,neighbourhood_cleansed,property_type,room_type,accommodates,bathrooms,bedrooms,beds,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,has_availability,availability_30,availability_60,availability_90,availability_365,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,reviews_per_month,price,email,phone,work_email,amenity_count,smoke_alarm,kitchen,essentials,hangers,wifi
0,within an hour,1.0,0.98,f,2.0,t,t,Yarra,Entire guesthouse,Entire home/apt,2.0,1.0,1.0,1.0,2,14.0,2.0,2.0,1125.0,1125.0,2.0,1125.0,t,0,0.0,0.0,12.0,741.0,37.0,1.0,4.88,4.91,4.97,4.94,4.93,4.93,4.82,f,6.11,132.0,1.0,1.0,0.0,38.0,1.0,0.0,1.0,1.0,1.0
1,within a few hours,1.0,0.98,f,1.0,t,t,Moreland,Private room in rental unit,Private room,2.0,1.0,1.0,1.0,4,27.0,4.0,4.0,27.0,27.0,4.0,27.0,t,0,12.0,22.0,112.0,169.0,25.0,3.0,4.48,4.64,3.97,4.72,4.69,4.65,4.6,f,1.37,39.0,1.0,1.0,0.0,57.0,1.0,1.0,1.0,1.0,0.0
2,within a few hours,1.0,0.78,t,2.0,t,t,Yarra Ranges,Entire rental unit,Entire home/apt,4.0,2.5,2.0,4.0,2,365.0,2.0,2.0,365.0,365.0,2.0,365.0,t,30,60.0,90.0,365.0,8.0,2.0,0.0,4.75,4.88,4.75,4.88,4.50,5.00,4.75,f,0.09,270.0,1.0,1.0,0.0,21.0,1.0,1.0,0.0,0.0,1.0
3,within a day,0.75,0.92,f,4.0,t,t,Melbourne,Private room in rental unit,Private room,2.0,2.5,1.0,1.0,1,730.0,1.0,1.0,730.0,730.0,1.0,730.0,t,30,60.0,90.0,365.0,2.0,0.0,0.0,4.50,4.00,4.50,4.00,4.00,5.00,4.0,f,0.01,1000.0,1.0,1.0,0.0,13.0,0.0,1.0,0.0,0.0,1.0
4,within a few hours,1.0,0.87,t,1.0,t,t,,Entire rental unit,Entire home/apt,5.0,1.0,3.0,3.0,1,14.0,1.0,1.0,14.0,14.0,1.0,14.0,t,17,21.0,51.0,312.0,214.0,39.0,4.0,4.86,4.91,4.98,4.91,4.93,4.90,4.87,f,1.43,116.0,1.0,1.0,1.0,49.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,,,,f,1.0,t,t,Boroondara,Private room in rental unit,Private room,2.0,1.0,1.0,1.0,1,1125.0,1.0,1.0,1125.0,1125.0,1.0,1125.0,t,0,0.0,0.0,0.0,2.0,0.0,0.0,4.50,4.00,5.00,5.00,5.00,4.50,4.5,f,0.02,,1.0,1.0,0.0,13.0,1.0,1.0,1.0,1.0,1.0
9996,,,,f,36.0,t,f,Stonnington,Entire rental unit,Entire home/apt,2.0,1.0,1.0,1.0,1,90.0,3.0,7.0,90.0,90.0,4.0,90.0,t,1,1.0,1.0,268.0,42.0,9.0,0.0,4.21,4.50,4.29,4.74,4.64,4.74,4.19,t,0.86,,1.0,1.0,1.0,40.0,1.0,1.0,1.0,1.0,1.0
9997,within an hour,1.0,0.92,f,36.0,t,t,Wyndham,Entire home,Entire home/apt,16.0,3.5,6.0,8.0,2,365.0,2.0,2.0,365.0,365.0,2.0,365.0,t,9,39.0,69.0,340.0,0.0,0.0,0.0,,,,,,,,t,,,1.0,1.0,0.0,42.0,1.0,1.0,1.0,1.0,1.0
9998,,,,f,2.0,t,t,Melbourne,Private room in rental unit,Private room,2.0,0.5,1.0,,1,1125.0,1.0,1.0,1125.0,1125.0,1.0,1125.0,t,0,0.0,0.0,0.0,3.0,0.0,0.0,3.00,4.00,3.00,2.33,3.33,4.00,3.33,t,0.08,,0.0,1.0,0.0,10.0,1.0,1.0,1.0,1.0,1.0


#### Neighbourhood

#### Property Type

### Continuous Feature Imputation

In [23]:
df_cont.isnull().sum().sort_values(ascending=False)

price                          3000
host_response_time              737
host_response_rate              737
host_acceptance_rate            721
bedrooms                        438
review_scores_value             322
review_scores_location          322
review_scores_communication     322
review_scores_checkin           322
review_scores_cleanliness       321
review_scores_accuracy          321
reviews_per_month               263
review_scores_rating            263
beds                             84
availability_365                 67
maximum_maximum_nights           55
minimum_minimum_nights           55
bathrooms                         6
number_of_reviews_ltm             0
number_of_reviews_l30d            0
availability_30                   0
number_of_reviews                 0
availability_90                   0
availability_60                   0
maximum_nights_avg_ntm            0
minimum_nights_avg_ntm            0
minimum_maximum_nights            0
maximum_minimum_nights      

### Basic Feature Imputation for Features with low amount of missing Values