### Neural Networks

#### 0. Data

In [2]:
import pandas as pd

In [3]:
house = pd.read_excel("http://byungwan.com/class/House_Prices2.xlsx")
house.head()

Unnamed: 0,HomeID,Price,SqFt,Bedrooms,Bathrooms,Offers,House_Class,Brick_Yes,Neighborhood_North,Neighborhood_West
0,1,114300,1790,2,2,2,0,0,0,0
1,2,114200,2030,4,2,3,0,0,0,0
2,3,114800,1740,3,2,1,0,0,0,0
3,4,94700,1980,3,2,3,0,0,0,0
4,5,119800,2130,3,3,3,0,0,0,0


In [5]:
y = house.iloc[:, 6]
x = house.iloc[:, [2,3,4,5,7,8,9]]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

### 0.1 Scaling

In [21]:
x.head()

Unnamed: 0,SqFt,Bedrooms,Bathrooms,Offers,Brick_Yes,Neighborhood_North,Neighborhood_West
0,1790,2,2,2,0,0,0
1,2030,4,2,3,0,0,0
2,1740,3,2,1,0,0,0
3,1980,3,2,3,0,0,0
4,2130,3,3,3,0,0,0


In [9]:
x.describe()

Unnamed: 0,SqFt,Bedrooms,Bathrooms,Offers,Brick_Yes,Neighborhood_North,Neighborhood_West
count,128.0,128.0,128.0,128.0,128.0,128.0,128.0
mean,2000.9375,3.023438,2.445312,2.578125,0.328125,0.34375,0.304688
std,211.572431,0.725951,0.514492,1.069324,0.471376,0.476825,0.462084
min,1450.0,2.0,2.0,1.0,0.0,0.0,0.0
25%,1880.0,3.0,2.0,2.0,0.0,0.0,0.0
50%,2000.0,3.0,2.0,3.0,0.0,0.0,0.0
75%,2140.0,3.0,3.0,3.0,1.0,1.0,1.0
max,2590.0,5.0,4.0,6.0,1.0,1.0,1.0


In [10]:
from sklearn.preprocessing import StandardScaler

In [11]:
scaler = StandardScaler()

In [12]:
scaler_model = scaler.fit(x)

In [13]:
scaled_x = scaler_model.transform(x)

In [17]:
pd.DataFrame(scaled_x, columns=x.columns).describe()

Unnamed: 0,SqFt,Bedrooms,Bathrooms,Offers,Brick_Yes,Neighborhood_North,Neighborhood_West
count,128.0,128.0,128.0,128.0,128.0,128.0,128.0
mean,-6.938894e-18,3.8163920000000003e-17,4.8572260000000006e-17,1.387779e-17,-6.938894e-18,-2.775558e-17,-2.775558e-17
std,1.003929,1.003929,1.003929,1.003929,1.003929,1.003929,1.003929
min,-2.614246,-1.415327,-0.8689388,-1.481614,-0.6988362,-0.7237469,-0.6619685
25%,-0.5738588,-0.03241208,-0.8689388,-0.5427694,-0.6988362,-0.7237469,-0.6619685
50%,-0.004448518,-0.03241208,-0.8689388,0.396075,-0.6988362,-0.7237469,-0.6619685
75%,0.6598635,-0.03241208,1.082362,0.396075,1.43095,1.381699,1.510646
max,2.795152,2.733419,3.033663,3.212608,1.43095,1.381699,1.510646


In [18]:
# std is almost 1, mean is almost 0 -> scaling is successful
# having similar scale for all features helps many machine learning algorithms perform better

### 1. Neural Networks (MLP: Multi-Layer Preception)

In [19]:
from sklearn.neural_network import MLPClassifier

In [20]:
mlp = MLPClassifier(activation='relu', hidden_layer_sizes=(1), max_iter=2000, random_state=0)

# chose what activation function to use
# relu is the most popular one among 3 options ('sigmoid', 'tanh', 'relu')

In [23]:
scaled_x_train = scaler_model.transform(x_train)

In [26]:
scaled_x_test = scaler_model.transform(x_test)

In [24]:
mlp_model = mlp.fit(scaled_x_train, y_train)

In [27]:
y_pred = mlp_model.predict(scaled_x_test)

In [28]:
from sklearn import metrics

In [29]:
metrics.accuracy_score(y_test, y_pred)

0.8205128205128205

#### 1.1 Number of nodes

In [30]:
from sklearn.model_selection import cross_val_score
import numpy as np

In [32]:
nums = list(range(1, 11))

In [33]:
for n in nums:
    mlp_n = MLPClassifier(activation='relu', hidden_layer_sizes=(n), max_iter=2000, random_state=0)
    scores = cross_val_score(mlp_n, scaled_x, y, cv=5)
    mean = np.mean(scores)
    print(str(n) + ": " + str(mean))

1: 0.8744615384615384
2: 0.8427692307692307
3: 0.9061538461538461
4: 0.8975384615384614
5: 0.8975384615384614
6: 0.8901538461538461
7: 0.8978461538461537
8: 0.8895384615384614
9: 0.8735384615384614
10: 0.8821538461538461


#### 1.2 Multiple hidden layers

In [34]:
mlp = MLPClassifier(activation='relu', hidden_layer_sizes=(1,1,1), max_iter=3000, random_state=0)

In [35]:
mlp_model = mlp.fit(scaled_x_train, y_train)

In [36]:
y_pred = mlp_model.predict(scaled_x_test)

In [37]:
metrics.accuracy_score(y_test, y_pred)

0.8717948717948718

In [38]:
x.head()

Unnamed: 0,SqFt,Bedrooms,Bathrooms,Offers,Brick_Yes,Neighborhood_North,Neighborhood_West
0,1790,2,2,2,0,0,0
1,2030,4,2,3,0,0,0
2,1740,3,2,1,0,0,0
3,1980,3,2,3,0,0,0
4,2130,3,3,3,0,0,0


In [39]:
import itertools

In [40]:
a = b = c = range(1, 4)

In [43]:
abc = list(itertools.product(a, b, c))
print(abc)

[(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 2, 1), (1, 2, 2), (1, 2, 3), (1, 3, 1), (1, 3, 2), (1, 3, 3), (2, 1, 1), (2, 1, 2), (2, 1, 3), (2, 2, 1), (2, 2, 2), (2, 2, 3), (2, 3, 1), (2, 3, 2), (2, 3, 3), (3, 1, 1), (3, 1, 2), (3, 1, 3), (3, 2, 1), (3, 2, 2), (3, 2, 3), (3, 3, 1), (3, 3, 2), (3, 3, 3)]


In [45]:
for n in abc:
    mlp_n = MLPClassifier(activation='relu', hidden_layer_sizes=n, max_iter=3000, random_state=0)
    scores = cross_val_score(mlp_n, scaled_x, y, cv=5)
    mean = np.mean(scores)
    print(str(n) + ": " + str(mean))

(1, 1, 1): 0.8975384615384614
(1, 1, 2): 0.8904615384615384
(1, 1, 3): 0.8978461538461537
(1, 2, 1): 0.5
(1, 2, 2): 0.8978461538461537
(1, 2, 3): 0.6824615384615385
(1, 3, 1): 0.5
(1, 3, 2): 0.8901538461538461
(1, 3, 3): 0.8824615384615384
(2, 1, 1): 0.8750769230769231
(2, 1, 2): 0.8984615384615384
(2, 1, 3): 0.8584615384615384
(2, 2, 1): 0.8981538461538461
(2, 2, 2): 0.5
(2, 2, 3): 0.8901538461538461
(2, 3, 1): 0.5
(2, 3, 2): 0.9058461538461537
(2, 3, 3): 0.8430769230769231
(3, 1, 1): 0.5
(3, 1, 2): 0.8590769230769231
(3, 1, 3): 0.8587692307692307
(3, 2, 1): 0.5
(3, 2, 2): 0.8747692307692307
(3, 2, 3): 0.8824615384615384
(3, 3, 1): 0.8898461538461537
(3, 3, 2): 0.8744615384615384
(3, 3, 3): 0.8898461538461537
