In [1]:
# Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from math import sqrt
from scipy import stats
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# read the house data spreadsheet
dataset = pd.read_csv('titanic.csv')

In [3]:
# impute missing values (y)
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
imputer.fit(dataset[['Age']])
dataset['Age'] = imputer.transform(dataset[['Age']]).ravel()

In [4]:
# convert binary categorical data (waterfront, renovated)
le = LabelEncoder()
dataset['Sex'] = le.fit_transform(dataset['Sex'])

In [5]:
# split the dataset into features and the prediction
data = dataset.iloc[:,].values
X = dataset.iloc[:,2:7].values
y = dataset.iloc[:,7].values

In [6]:
# split the sqft_living into test and train data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25, random_state = 0)

In [7]:
# feature scaling (if necessary)
#sc = StandardScaler()
#X_train = sc.fit_transform(X_train)
#X_test = sc.transform(X_test)

In [8]:
# Logistical Regression
model = LogisticRegression(solver='liblinear', random_state=0)
model.fit(X_train, y_train)

LogisticRegression(random_state=0, solver='liblinear')

In [9]:
accuracy = model.score(X_test,y_test)
accuracy

0.7847533632286996

In [10]:

# logistical regression predictons
k_predict1 = model.predict([[1, 4, 0, 0, 3]])
print(k_predict1)
k_predict2 = model.predict([[1, 4, 4, 0, 3]])
print(k_predict2)
k_predict3 = model.predict([[1, 4, 0, 5, 3]])
print(k_predict3)
k_predict4 = model.predict([[1, 4, 0, 0, 1]])
print(k_predict4)
k_predict5 = model.predict([[1, 40, 0, 0, 3]])
print(k_predict5)
k_predict6 = model.predict([[1, 40, 4, 0, 3]])
print(k_predict6)
k_predict7 = model.predict([[1, 40, 0, 5, 3]])
print(k_predict7)
k_predict8 = model.predict([[1, 40, 0, 0, 1]])
print(k_predict8)
k_predict9 = model.predict([[0, 4, 0, 0, 3]])
print(k_predict9)
k_predict10 = model.predict([[0, 4, 4, 0, 3]])
print(k_predict10)
k_predict11 = model.predict([[0, 4, 0, 5, 3]])
print(k_predict11)
k_predict12 = model.predict([[0, 4, 0, 0, 1]])
print(k_predict12)
k_predict13 = model.predict([[0, 40, 0, 0, 3]])
print(k_predict13)
k_predict14 = model.predict([[0, 40, 4, 0, 3]])
print(k_predict14)
k_predict15 = model.predict([[0, 40, 0, 5, 3]])
print(k_predict15)
k_predict16 = model.predict([[0, 40, 0, 0, 1]])
print(k_predict16)

[0]
[0]
[0]
[1]
[0]
[0]
[0]
[0]
[1]
[1]
[1]
[1]
[1]
[0]
[1]
[1]


In [11]:
# k-nearest neighbors
classifier = KNeighborsClassifier(n_neighbors = 7, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=7)

In [12]:
# predict k-nearest neighbors
k_predict = classifier.predict(X_test)
print(np.concatenate((k_predict.reshape(len(k_predict),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 1]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 1]
 [0 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]

In [13]:
# confusion matrix
cm = confusion_matrix(y_test, k_predict)
print(cm)

[[123  16]
 [ 33  51]]


In [14]:
# accuracy score
accuracy_score(y_test, k_predict)

0.7802690582959642

In [15]:
# k-nearest neighbor predictons
k_predict1 = classifier.predict([[1, 4, 0, 0, 3]])
print(k_predict1)
k_predict2 = classifier.predict([[1, 4, 4, 0, 3]])
print(k_predict2)
k_predict3 = classifier.predict([[1, 4, 0, 5, 3]])
print(k_predict3)
k_predict4 = classifier.predict([[1, 4, 0, 0, 1]])
print(k_predict4)
k_predict5 = classifier.predict([[1, 40, 0, 0, 3]])
print(k_predict5)
k_predict6 = classifier.predict([[1, 40, 4, 0, 3]])
print(k_predict6)
k_predict7 = classifier.predict([[1, 40, 0, 5, 3]])
print(k_predict7)
k_predict8 = classifier.predict([[1, 40, 0, 0, 1]])
print(k_predict8)
k_predict9 = classifier.predict([[0, 4, 0, 0, 3]])
print(k_predict9)
k_predict10 = classifier.predict([[0, 4, 4, 0, 3]])
print(k_predict10)
k_predict11 = classifier.predict([[0, 4, 0, 5, 3]])
print(k_predict11)
k_predict12 = classifier.predict([[0, 4, 0, 0, 1]])
print(k_predict12)
k_predict13 = classifier.predict([[0, 40, 0, 0, 3]])
print(k_predict13)
k_predict14 = classifier.predict([[0, 40, 4, 0, 3]])
print(k_predict14)
k_predict15 = classifier.predict([[0, 40, 0, 5, 3]])
print(k_predict15)
k_predict16 = classifier.predict([[0, 40, 0, 0, 1]])
print(k_predict16)

[1]
[0]
[1]
[1]
[0]
[0]
[0]
[0]
[1]
[0]
[1]
[1]
[0]
[0]
[0]
[1]


In [16]:
# support vector machines
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)
svm_predict = classifier.predict(X_test)

In [17]:
# confusion matrix
cm = confusion_matrix(y_test, svm_predict)
cm

array([[115,  24],
       [ 25,  59]])

In [18]:
accuracy_score(y_test, svm_predict)

0.7802690582959642

In [20]:
# support vector machine predictions 
svm_predict1 = classifier.predict([[1, 4, 0, 0, 3]])
print(svm_predict1)
svm_predict2 = classifier.predict([[1, 4, 4, 0, 3]])
print(svm_predict2)
svm_predict3 = classifier.predict([[1, 4, 0, 5, 3]])
print(svm_predict3)
svm_predict4 = classifier.predict([[1, 4, 0, 0, 1]])
print(svm_predict4)
svm_predict5 = classifier.predict([[1, 40, 0, 0, 3]])
print(svm_predict5)
svm_predict6 = classifier.predict([[1, 40, 4, 0, 3]])
print(svm_predict6)
svm_predict7 = classifier.predict([[1, 40, 0, 5, 3]])
print(svm_predict7)
svm_predict8 = classifier.predict([[1, 40, 0, 0, 1]])
print(svm_predict8)
svm_predict9 = classifier.predict([[0, 4, 0, 0, 3]])
print(svm_predict9)
svm_predict10 = classifier.predict([[0, 4, 4, 0, 3]])
print(svm_predict10)
svm_predict11 = classifier.predict([[0, 4, 0, 5, 3]])
print(svm_predict11)
svm_predict12 = classifier.predict([[0, 4, 0, 0, 1]])
print(svm_predict12)
svm_predict13 = classifier.predict([[0, 40, 0, 0, 3]])
print(svm_predict13)
svm_predict14 = classifier.predict([[0, 40, 4, 0, 3]])
print(svm_predict14)
svm_predict15 = classifier.predict([[0, 40, 0, 5, 3]])
print(svm_predict15)
svm_predict16 = classifier.predict([[0, 40, 0, 0, 1]])
print(svm_predict16)

[0]
[0]
[0]
[0]
[0]
[0]
[0]
[0]
[1]
[1]
[1]
[1]
[1]
[1]
[1]
[1]


In [21]:
# kernel svm 
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)

SVC(random_state=0)

In [22]:
ksvm_predict = classifier.predict(X_test)
print(np.concatenate((ksvm_predict.reshape(len(ksvm_predict),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 1]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 1]
 [0 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 1]
 [0 0]
 [0 1]
 [0 1]
 [0 1]
 [0 1]
 [0 0]
 [0 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]

In [23]:
# confusion matrix
cm = confusion_matrix(y_test, ksvm_predict)
cm

array([[138,   1],
       [ 75,   9]])

In [24]:
accuracy_score(y_test, ksvm_predict)

0.6591928251121076

In [25]:
# kernel svm predictons
ksvm_predict1 = classifier.predict([[1, 4, 0, 0, 3]])
print(ksvm_predict1)
ksvm_predict2 = classifier.predict([[1, 4, 4, 0, 3]])
print(ksvm_predict2)
ksvm_predict3 = classifier.predict([[1, 4, 0, 5, 3]])
print(ksvm_predict3)
ksvm_predict4 = classifier.predict([[1, 4, 0, 0, 1]])
print(ksvm_predict4)
ksvm_predict5 = classifier.predict([[1, 40, 0, 0, 3]])
print(ksvm_predict5)
ksvm_predict6 = classifier.predict([[1, 40, 4, 0, 3]])
print(ksvm_predict6)
ksvm_predict7 = classifier.predict([[1, 40, 0, 5, 3]])
print(ksvm_predict7)
ksvm_predict8 = classifier.predict([[1, 40, 0, 0, 1]])
print(ksvm_predict8)
ksvm_predict9 = classifier.predict([[0, 4, 0, 0, 3]])
print(ksvm_predict9)
ksvm_predict10 = classifier.predict([[0, 4, 4, 0, 3]])
print(ksvm_predict10)
ksvm_predict11 = classifier.predict([[0, 4, 0, 5, 3]])
print(ksvm_predict11)
ksvm_predict12 = classifier.predict([[0, 4, 0, 0, 1]])
print(ksvm_predict12)
ksvm_predict13 = classifier.predict([[0, 40, 0, 0, 3]])
print(ksvm_predict13)
ksvm_predict14 = classifier.predict([[0, 40, 4, 0, 3]])
print(ksvm_predict14)
ksvm_predict15 = classifier.predict([[0, 40, 0, 5, 3]])
print(ksvm_predict15)
ksvm_predict16 = classifier.predict([[0, 40, 0, 0, 1]])
print(ksvm_predict16)

[1]
[1]
[1]
[1]
[0]
[0]
[0]
[0]
[1]
[1]
[1]
[1]
[0]
[0]
[0]
[0]


In [26]:
# naive bayes
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB()

In [27]:
nb_predict = classifier.predict(X_test)

In [28]:
# confusion matrix
cm = confusion_matrix(y_test, nb_predict)
print(cm)

[[111  28]
 [ 22  62]]


In [29]:
# accuracy score
accuracy_score(y_test, nb_predict)

0.7757847533632287

In [30]:
# kernel svm predictons
ksvm_predict1 = classifier.predict([[1, 4, 0, 0, 3]])
print(ksvm_predict1)
ksvm_predict2 = classifier.predict([[1, 4, 4, 0, 3]])
print(ksvm_predict2)
ksvm_predict3 = classifier.predict([[1, 4, 0, 5, 3]])
print(ksvm_predict3)
ksvm_predict4 = classifier.predict([[1, 4, 0, 0, 1]])
print(ksvm_predict4)
ksvm_predict5 = classifier.predict([[1, 40, 0, 0, 3]])
print(ksvm_predict5)
ksvm_predict6 = classifier.predict([[1, 40, 4, 0, 3]])
print(ksvm_predict6)
ksvm_predict7 = classifier.predict([[1, 40, 0, 5, 3]])
print(ksvm_predict7)
ksvm_predict8 = classifier.predict([[1, 40, 0, 0, 1]])
print(ksvm_predict8)
ksvm_predict9 = classifier.predict([[0, 4, 0, 0, 3]])
print(ksvm_predict9)
ksvm_predict10 = classifier.predict([[0, 4, 4, 0, 3]])
print(ksvm_predict10)
ksvm_predict11 = classifier.predict([[0, 4, 0, 5, 3]])
print(ksvm_predict11)
ksvm_predict12 = classifier.predict([[0, 4, 0, 0, 1]])
print(ksvm_predict12)
ksvm_predict13 = classifier.predict([[0, 40, 0, 0, 3]])
print(ksvm_predict13)
ksvm_predict14 = classifier.predict([[0, 40, 4, 0, 3]])
print(ksvm_predict14)
ksvm_predict15 = classifier.predict([[0, 40, 0, 5, 3]])
print(ksvm_predict15)
ksvm_predict16 = classifier.predict([[0, 40, 0, 0, 1]])
print(ksvm_predict16)

[0]
[0]
[0]
[1]
[0]
[0]
[0]
[0]
[1]
[0]
[1]
[1]
[1]
[0]
[1]
[1]


In [30]:
# decision tree
decisionregressor = DecisionTreeRegressor(random_state = 0)
decisionregressor.fit(X,y)

DecisionTreeRegressor(random_state=0)

In [31]:
dt_predict = classifier.predict(X_test)

In [32]:
# accuracy score
accuracy_score(y_test, dt_predict)

0.7757847533632287

In [33]:
# confusion matrix
cm = confusion_matrix(y_test, dt_predict)
print(cm)

[[111  28]
 [ 22  62]]


In [34]:
d1 = decisionregressor.predict([[1, 4, 0, 0, 3]])
print(d1)
d2 = decisionregressor.predict([[1, 4, 4, 0, 3]])
print(d2)
d3 = decisionregressor.predict([[1, 4, 0, 5, 3]])
print(d3)
d4 = decisionregressor.predict([[1, 4, 0, 0, 1]])
print(d4)
d5 = decisionregressor.predict([[1, 40, 0, 0, 3]])
print(d5)
d6 = decisionregressor.predict([[1, 40, 4, 0, 3]])
print(d6)
d7 = decisionregressor.predict([[1, 40, 0, 5, 3]])
print(d7)
d8 = decisionregressor.predict([[1, 40, 0, 0, 1]])
print(d8)
d9 = decisionregressor.predict([[0, 4, 0, 0, 3]])
print(d9)
d10 = decisionregressor.predict([[0, 4, 4, 0, 3]])
print(d10)
d11 = decisionregressor.predict([[0, 4, 0, 5, 3]])
print(d11)
d12 = decisionregressor.predict([[0, 4, 0, 1, 1]])
print(d12)
d13 = decisionregressor.predict([[0, 40, 0, 0, 3]])
print(d13)
d14 = decisionregressor.predict([[0, 40, 4, 0, 3]])
print(d14)
d15 = decisionregressor.predict([[0, 40, 0, 5, 3]])
print(d15)
d16 = decisionregressor.predict([[0, 40, 0, 1, 1]])
print(d16)

[1.]
[0.]
[1.]
[1.]
[0.]
[0.]
[0.]
[0.33333333]
[1.]
[1.]
[1.]
[1.]
[0.]
[1.]
[0.]
[1.]


In [35]:
accuracy_score(y_test, dt_predict)

0.7757847533632287

In [36]:
# Random Forest Regression
forestregressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
forestregressor.fit(X,y)

RandomForestRegressor(n_estimators=10, random_state=0)

In [37]:
rf_predict = classifier.predict(X_test)

In [38]:
# accuracy score
accuracy_score(y_test, rf_predict)

0.7757847533632287

In [39]:
d1 = forestregressor.predict([[1, 4, 0, 0, 3]])
print(d1)
d2 = forestregressor.predict([[1, 4, 4, 0, 3]])
print(d2)
d3 = forestregressor.predict([[1, 4, 0, 5, 3]])
print(d3)
d4 = forestregressor.predict([[1, 4, 0, 0, 1]])
print(d4)
d5 = forestregressor.predict([[1, 40, 0, 0, 3]])
print(d5)
d6 = forestregressor.predict([[1, 40, 4, 0, 3]])
print(d6)
d7 = forestregressor.predict([[1, 40, 0, 5, 3]])
print(d7)
d8 = forestregressor.predict([[1, 40, 0, 0, 1]])
print(d8)
d9 = forestregressor.predict([[0, 4, 0, 0, 3]])
print(d9)
d10 = forestregressor.predict([[0, 4, 4, 0, 3]])
print(d10)
d11 = forestregressor.predict([[0, 4, 0, 5, 3]])
print(d11)
d12 = forestregressor.predict([[0, 4, 0, 1, 1]])
print(d12)
d13 = forestregressor.predict([[0, 40, 0, 0, 3]])
print(d13)
d14 = forestregressor.predict([[0, 40, 4, 0, 3]])
print(d14)
d15 = forestregressor.predict([[0, 40, 0, 5, 3]])
print(d15)
d16 = forestregressor.predict([[0, 40, 0, 1, 1]])
print(d16)

[0.9]
[0.]
[1.]
[0.9]
[0.]
[0.]
[0.]
[0.22666667]
[0.95]
[0.4]
[0.7]
[1.]
[0.]
[0.2]
[0.]
[1.]
