Eun Ju Jong

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display
import os
import pandas as pd
import matplotlib as mpl
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
np.random.seed(42)
from sklearn.model_selection import train_test_split
#from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
import time

In [2]:
housing = pd.read_csv("housing.csv")

housing_input = housing.drop("median_house_value", axis=1)
housing_output = housing["median_house_value"].copy()
encoded_input = pd.get_dummies(housing_input)

mean_input = encoded_input.fillna(encoded_input.mean())

X_train, X_test, y_train, y_test = train_test_split(mean_input, housing_output, random_state=42)

In [4]:
# Finding a max_iter that converges
# Used time() to find a convergent max_iter that does not take too much run time
start = time.time()
mlp = MLPRegressor(max_iter=1500, random_state=42)
mlp.fit(X_train, y_train)
end = time.time()
print("Accuracy on training set: {:.3f}".format(mlp.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, y_test)))
print(end - start, "sec")

Accuracy on training set: 0.694
Accuracy on test set: 0.683
96.17496275901794 sec


In [5]:
# Rescaling features
# tried up to max_iter=2000, but the function still did not converge.
# The performance did not improve by large with max_iter=2000.
# So, will use max_iter=1500
mean_on_train = X_train.mean(axis=0)
var_on_train = X_train.var(axis=0)

X_train_scaled = (X_train - mean_on_train)/var_on_train
X_test_scaled = (X_test - mean_on_train)/var_on_train

mlp = MLPRegressor(max_iter=1500, random_state=42)
mlp.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))

Accuracy on training set: 0.605
Accuracy on test set: 0.602




In [18]:
# Tuning the level of regularization
for i in np.linspace(0.0001, 10, 10):
    mlp = MLPRegressor(max_iter=1500, alpha=i, random_state=42)
    mlp.fit(X_train_scaled, y_train)
    print(i, " Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))



0.0001  Accuracy on training set: 0.605  Accuracy on test set: 0.602




1.1112  Accuracy on training set: 0.602  Accuracy on test set: 0.600




2.2223  Accuracy on training set: 0.604  Accuracy on test set: 0.601




3.3334  Accuracy on training set: 0.606  Accuracy on test set: 0.603




4.4445  Accuracy on training set: 0.603  Accuracy on test set: 0.601




5.5556  Accuracy on training set: 0.603  Accuracy on test set: 0.600




6.6667  Accuracy on training set: 0.604  Accuracy on test set: 0.601




7.777799999999999  Accuracy on training set: 0.603  Accuracy on test set: 0.601




8.8889  Accuracy on training set: 0.603  Accuracy on test set: 0.600
10.0  Accuracy on training set: 0.603  Accuracy on test set: 0.600




In [None]:
mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[100], random_state=42)
mlp.fit(X_train_scaled, y_train)
print(" Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))

In [15]:
# Tuning the # of nodes in a hidden layer
for i in np.linspace(1, 100, 10):
    mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[int(i)], random_state=42)
    mlp.fit(X_train_scaled, y_train)
    print(i, " Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))



1.0  Accuracy on training set: -3.209  Accuracy on test set: -3.213




12.0  Accuracy on training set: 0.466  Accuracy on test set: 0.467




23.0  Accuracy on training set: 0.563  Accuracy on test set: 0.559




34.0  Accuracy on training set: 0.577  Accuracy on test set: 0.571




45.0  Accuracy on training set: 0.596  Accuracy on test set: 0.591




56.0  Accuracy on training set: 0.585  Accuracy on test set: 0.583




67.0  Accuracy on training set: 0.601  Accuracy on test set: 0.597




78.0  Accuracy on training set: 0.601  Accuracy on test set: 0.599




89.0  Accuracy on training set: 0.606  Accuracy on test set: 0.603




100.0  Accuracy on training set: 0.605  Accuracy on test set: 0.602


In [16]:
#2 hidden layers
mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[100, 100], random_state=42)
mlp.fit(X_train_scaled, y_train)
print(i, " Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))



100.0  Accuracy on training set: 0.702  Accuracy on test set: 0.697


In [17]:
#3 hidden layers
mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[100, 100, 100], random_state=42)
mlp.fit(X_train_scaled, y_train)
print(i, " Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))

100.0  Accuracy on training set: 0.739  Accuracy on test set: 0.726


In [18]:
#2 hidden layers
for i in np.linspace(1, 100, 10):
    mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[int(i), int(i)], random_state=42)
    mlp.fit(X_train_scaled, y_train)
    print(i, " Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))



1.0  Accuracy on training set: -3.209  Accuracy on test set: -3.213




12.0  Accuracy on training set: 0.636  Accuracy on test set: 0.636




23.0  Accuracy on training set: 0.654  Accuracy on test set: 0.656




34.0  Accuracy on training set: 0.663  Accuracy on test set: 0.664




45.0  Accuracy on training set: 0.673  Accuracy on test set: 0.674




56.0  Accuracy on training set: 0.670  Accuracy on test set: 0.668




67.0  Accuracy on training set: 0.690  Accuracy on test set: 0.688




78.0  Accuracy on training set: 0.653  Accuracy on test set: 0.655




89.0  Accuracy on training set: -1.612  Accuracy on test set: -1.601
100.0  Accuracy on training set: -1.057  Accuracy on test set: -1.047




In [20]:
#3 hidden layers
for i in np.linspace(1, 101, 10):
    mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[int(i), int(i), int(i)], random_state=42)
    mlp.fit(X_train_scaled, y_train)
    print(i, " Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))  



1.0  Accuracy on training set: -3.212  Accuracy on test set: -3.217




12.11111111111111  Accuracy on training set: 0.487  Accuracy on test set: 0.486




23.22222222222222  Accuracy on training set: 0.590  Accuracy on test set: 0.586




34.33333333333333  Accuracy on training set: 0.413  Accuracy on test set: 0.415




45.44444444444444  Accuracy on training set: -2.403  Accuracy on test set: -2.394




56.55555555555556  Accuracy on training set: -3.209  Accuracy on test set: -3.214
67.66666666666666  Accuracy on training set: -3.212  Accuracy on test set: -3.217




78.77777777777777  Accuracy on training set: -3.212  Accuracy on test set: -3.217
89.88888888888889  Accuracy on training set: -3.212  Accuracy on test set: -3.217




KeyboardInterrupt: 

In [21]:
mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[200], random_state=42)
mlp.fit(X_train_scaled, y_train)
print(" Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))  



101.0  Accuracy on training set: 0.616  Accuracy on test set: 0.615


In [22]:
mlp = MLPRegressor(max_iter=1500, alpha=0.0001, hidden_layer_sizes=[200, 200], random_state=42)
mlp.fit(X_train_scaled, y_train)
print(" Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)), " Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))  

101.0  Accuracy on training set: 0.714  Accuracy on test set: 0.708
