# SVM Exploration

In [1]:
# Necessary Imports
import pandas as pd
import numpy as np

from sklearn.svm import SVC

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

## SVM on Full Data Set

In [2]:
# Load Data non interaction data
df = pd.read_json('./rawData/cleaned.json')
df.columns

Index(['Balcony', 'Cats_Allowed', 'Common_Outdoor_Space', 'Dining_Room',
       'Dishwasher', 'Dogs_Allowed', 'Doorman', 'Elevator', 'Exclusive',
       'Fitness_Center', 'Garden_Patio', 'Hardwood_Floors',
       'High_Speed_Internet', 'Laundry_in_Unit', 'Loft', 'New_Construction',
       'No_Fee', 'Outdoor_Space', 'Pre_War', 'Roof_Deck', 'Swimming_Pool',
       'Terrace', 'Wheelchair_Access', 'bathrooms', 'bedrooms', 'building_id',
       'created', 'description', 'display_address', 'features', 'interestVal',
       'interest_level', 'latitude', 'laundry_in_building', 'listing_id',
       'longitude', 'manager_id', 'price', 'street_address'],
      dtype='object')

In [3]:
# Split into Training and Test Sets
X_tr, X_test, Y_tr, Y_test = train_test_split(df.drop(['interest_level'],axis=1), df['interest_level'], test_size=0.3, random_state=42)
X_tr_wo_obj = X_tr.select_dtypes(exclude=['object'])
X_tr_wo_obj = X_tr_wo_obj.drop(['interestVal'],axis=1)
X_test_wo_obj = X_test.select_dtypes(exclude=['object'])
X_test_wo_obj = X_test_wo_obj.drop(['interestVal'],axis=1)

# Fit SVM to Data with Interaction Terms
svm_model = SVC()
svm_fit = svm_model.fit(X_tr_wo_obj, Y_tr)

In [4]:
X_test_wo_obj.columns

Index(['Balcony', 'Cats_Allowed', 'Common_Outdoor_Space', 'Dining_Room',
       'Dishwasher', 'Dogs_Allowed', 'Doorman', 'Elevator', 'Exclusive',
       'Fitness_Center', 'Garden_Patio', 'Hardwood_Floors',
       'High_Speed_Internet', 'Laundry_in_Unit', 'Loft', 'New_Construction',
       'No_Fee', 'Outdoor_Space', 'Pre_War', 'Roof_Deck', 'Swimming_Pool',
       'Terrace', 'Wheelchair_Access', 'bathrooms', 'bedrooms', 'latitude',
       'laundry_in_building', 'listing_id', 'longitude', 'price'],
      dtype='object')

In [5]:
# Predict Test Data with SVM
print('Score of SVM with no interaction terms = %s' % svm_model.score(X_test_wo_obj, Y_test))

Score of SVM with no interaction terms = 0.698075699746


In [6]:
# Load Data with Interaction Terms
df2 = pd.read_json('./typedData/withInteraction.json')
df2.columns

Index(['Balcony', 'Cats_Allowed', 'Common_Outdoor_Space', 'Dining_Room',
       'Dishwasher', 'Dogs_Allowed', 'Doorman', 'Elevator', 'Exclusive',
       'Fitness_Center', 'Garden_Patio', 'Hardwood_Floors',
       'High_Speed_Internet', 'Laundry_in_Unit', 'Loft', 'New_Construction',
       'No_Fee', 'Outdoor_Space', 'Pre_War', 'Roof_Deck', 'Swimming_Pool',
       'Terrace', 'Wheelchair_Access', 'ada', 'bathrooms', 'bedrooms',
       'building_id', 'created', 'description', 'display_address', 'door_excl',
       'features', 'fitness_oriented', 'interestVal', 'interest_level',
       'latitude', 'laundry_in_building', 'listing_id', 'longitude',
       'lux_score', 'manager_id', 'num_features', 'num_luxury',
       'outdoor_score', 'pets_allowed', 'price', 'street_address'],
      dtype='object')

In [8]:
# Split into Training and Test Sets
X_tr_2, X_test_2, Y_tr_2, Y_test_2 = train_test_split(df2.drop(['interest_level'],axis=1), df2['interest_level'], test_size=0.3, random_state=42)
X_tr_wo_obj_2 = X_tr_2.select_dtypes(exclude=['object'])
X_tr_wo_obj_2 = X_tr_wo_obj_2.drop(['interestVal'],axis=1)
X_test_wo_obj_2 = X_test_2.select_dtypes(exclude=['object'])
X_test_wo_obj_2 = X_test_wo_obj_2.drop(['interestVal'],axis=1)

# Fit SVM to Data with Interaction Terms
svm_model_2 = SVC()
svm_fit_2 = svm_model_2.fit(X_tr_wo_obj_2, Y_tr_2)

In [9]:
# Predict Test Data with SVM
print('Score of SVM with no interaction terms = %s' % svm_model_2.score(X_test_wo_obj_2, Y_test_2))

Score of SVM with no interaction terms = 0.697996183206


## SVM on Typed Data Set

In [None]:
# Load Typed Data without Interaction Terms
df3 = pd.read_json('./typedData/withInteraction.json')
df3.columns

In [None]:
# Split into Training and Test Sets
X_tr_2, X_test_2, Y_tr_2, Y_test_2 = train_test_split(df2.drop(['interest_level'],axis=1), df2['interest_level'], test_size=0.3, random_state=42)
X_tr_wo_obj_2 = X_tr_2.select_dtypes(exclude=['object'])
X_tr_wo_obj_2 = X_tr_wo_obj_2.drop(['interestVal'],axis=1)
X_test_wo_obj_2 = X_test_2.select_dtypes(exclude=['object'])
X_test_wo_obj_2 = X_test_wo_obj_2.drop(['interestVal'],axis=1)

# Fit SVM to Data with Interaction Terms
svm_model_2 = SVC()
svm_fit_2 = svm_model_2.fit(X_tr_wo_obj_2, Y_tr_2)

In [None]:
# Predict Test Data with SVM
print('Score of SVM with no interaction terms = %s' % svm_model_2.score(X_test_wo_obj_2, Y_test_2))