# Neural Network 실습

## 사용할 데이터

1. Car(ToyotaCorolla) - regression 예제
2. wdbc - classification 예제

### 실습 모듈 import


In [None]:
%matplotlib inline

import mglearn
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize,StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV
from sklearn.metrics import confusion_matrix, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier,MLPRegressor
from sklearn.datasets import make_moons, make_circles, make_classification

### 분류 성능 지표 함수 정의 
1. Confusion matrix
2. Accuracay
3. Recall
4. Precision
5. F-1 measure
6. Balanced measure

In [None]:
##########################################################################
# 분류기 성능 지표 산출 함수
##########################################################################
def evaluation(y_true, y_pred):
    cfm = confusion_matrix(y_true=y_true, y_pred=y_pred)
    acc = (cfm[0, 0] + cfm[1, 1]) / np.sum(cfm)
    recall = cfm[1, 1] / np.sum(cfm[:, 1])
    precision = cfm[1, 1] / np.sum(cfm[1, :])
    f1 = 2 * (precision * recall) / (precision + recall)
    bcr = np.sqrt(cfm[1, 1] / (sum(cfm[1, :])) * cfm[0, 0] / (sum(cfm[0, :])))
    return [cfm, acc, recall, precision, f1, bcr]

### 기본 Neural Network model 함수 정의

**sklearn.neural_network.MLPClassifier** : 'http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html'

In [None]:
##########################################################################
# Neural network model
##########################################################################
def NN_classifier(train_x, train_y, hidden_nodes, plot=False):
    # NN 객체 생성
    NN_clf = MLPClassifier(solver='lbfgs', random_state=0, 
                           hidden_layer_sizes=[hidden_nodes], verbose=True)
    # NN fit
    NN_clf.fit(X=train_x, y=train_y)

    if plot:
       
        mglearn.plots.plot_2d_separator(NN_clf, train_x, fill=True, alpha=.3)
        mglearn.discrete_scatter(train_x[:, 0], train_x[:, 1], train_y)
        plt.xlabel("특성 0")
        plt.ylabel("특성 1")
        plt.show()

    return NN_clf


In [None]:
### Neural Network Activation function ###
line = np.linspace(-3, 3, 100)
plt.plot(line, np.tanh(line), label="tanh")
plt.plot(line, np.maximum(line, 0), linestyle='--', label="relu")
plt.plot(line,1 / (1 + np.exp(-line)),linestyle='-.',label='sigmoid')
plt.legend(loc="best")
plt.xlabel("x")
plt.ylabel("relu(x), tanh(x), sigmoid(x)")
plt.grid()

## 예제 데이터를 활용한 비선형 분류 모델의 역활 수행

In [None]:
### Hidden node 수에 따른 비선형 분류모델의 역활 수행
# 비선형 분류 데이터 생성
X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
X_train, X_test, y_train, y_test = \
        train_test_split(X, y, stratify=y,random_state=42)

### 히든노드수가 2개인경우

In [None]:
# 2개의 히든노드
mlp_2 = MLPClassifier(solver='lbfgs', random_state=0,  # default relu
                      hidden_layer_sizes=[2],verbose=True)
mlp_2.fit(X_train, y_train)
mglearn.plots.plot_2d_separator(mlp_2, X_train, fill=True, alpha=.3)
mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
plt.xlabel("Attribute 0")
plt.ylabel("Attribute 1")
plt.show()

### 히든노드수가 10개인경우

In [None]:
# 10개의 히든노드
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,random_state=42)
mlp_10 = MLPClassifier(solver='lbfgs', random_state=0, 
                       hidden_layer_sizes=[10],verbose=True)
mlp_10.fit(X_train, y_train)
mglearn.plots.plot_2d_separator(mlp_10, X_train, fill=True, alpha=.3)
mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
plt.xlabel("Attribute 0")
plt.ylabel("Attribute 1")


### 10개의 히든노드로 구성된 2개의 은닉층을 구성한 경우

In [None]:
# 10개의 유닛으로 된 두 개의 은닉층
mlp = MLPClassifier(solver='lbfgs', random_state=0,
                    hidden_layer_sizes=[10, 10])
mlp.fit(X_train, y_train)
mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
plt.xlabel("Attribute 0")
plt.ylabel("Attribute 1")

### Actication 함수를 'tanh'로 바꿨을 경우

In [None]:
# tanh 활성화 함수가 적용된 10개의 유닛으로 된 두 개의 은닉층
mlp = MLPClassifier(solver='lbfgs', activation='tanh',
                    random_state=0, hidden_layer_sizes=[10, 10])
mlp.fit(X_train, y_train)
mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
plt.xlabel("Attribute 0")
plt.ylabel("Attribute 1")


### Actication 함수를 'sigmoid'로 바꿨을 경우

In [None]:
# tanh 활성화 함수가 적용된 10개의 유닛으로 된 두 개의 은닉층
mlp = MLPClassifier(solver='lbfgs', activation='logistic', # sigmoid
                    random_state=0, hidden_layer_sizes=[10, 10])
mlp.fit(X_train, y_train)
mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
plt.xlabel("Attribute 0")
plt.ylabel("Attribute 1")

In [None]:
MLPClassifier?

### Neural Network의 학습 iteration에 따른 분류 결정경계면

In [None]:
# Max iteration에 따른 분류 결정경계면
fig, axes = plt.subplots(1, 6, figsize=(24,4))
for ax, n_maxiter in zip(axes, [10,20,40,50,100,200]):
    mlp = MLPClassifier(solver='lbfgs', random_state=0,
                        hidden_layer_sizes=10,max_iter=n_maxiter)
    mlp.fit(X_train, y_train)
    mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax)
    mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax)
    ax.set_title("n_hidden=[{}]\nmax_iteration={}".format(
                  5, n_maxiter))

### Neural Network의 hidden node의 수와 regularize 파라미터 alpha의 변화에 따른 분류 경계면


In [None]:
# Hidden node수에 따른 인공신경망 결정경계면
fig, axes = plt.subplots(4, 4, figsize=(20, 20))
for axx, n_hidden_nodes in zip(axes, [2,4,8,10]):
    for ax, alpha in zip(axx, [0.0001, 0.01, 0.1, 0.5]):
        mlp = MLPClassifier(solver='lbfgs', random_state=0,
                            hidden_layer_sizes=[n_hidden_nodes],
                            alpha=alpha)
        mlp.fit(X_train, y_train)
        mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax)
        mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax)
        ax.set_title("n_hidden=[{}]\nalpha={:.4f}".format(
                      n_hidden_nodes, alpha))

### Neural Network의 hidden node의 수와 regularize 파라미터 alpha의 변화에 따른 분류 경계면 (2개의 은닉층)

In [None]:
# 2개의 Hidden Layer Hidden node수에 따른 인공신경망 결정경계면
fig, axes = plt.subplots(4, 4, figsize=(20, 20))
for axx, n_hidden_nodes in zip(axes, [2,4,6,8]):
    for ax, alpha in zip(axx, [0.0001, 0.01, 0.1, 0.5]):
        mlp = MLPClassifier(solver='lbfgs', random_state=0,
                            hidden_layer_sizes=[n_hidden_nodes, n_hidden_nodes],
                            alpha=alpha)
        mlp.fit(X_train, y_train)
        mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax)
        mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax)
        ax.set_title("n_hidden=[{}, {}]\nalpha={:.4f}".format(
            n_hidden_nodes, n_hidden_nodes, alpha))


### Neural Network의 regularize 파라미터 alpha의 변화에 따른 다양한 데이터의 분류 경계면 시각화


In [None]:
#### 데이터 형태에 따른 L2 penalty(alpha) 변화에 따른 분류결정 경계면 Example
h = .02  # step size in the mesh
alphas = np.logspace(-5, 3, 5)
names = []
for i in alphas:
    names.append('alpha ' + str(i))

classifiers = []
for i in alphas:
    classifiers.append(MLPClassifier(hidden_layer_sizes=(100, ), 
                                     alpha=i, random_state=1))

X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                           random_state=0, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

datasets = [make_moons(noise=0.3, random_state=0),
            make_circles(noise=0.2, factor=0.5, random_state=1),
            linearly_separable]

figure = plt.figure(figsize=(17, 9))
i = 1

# iterate over datasets
for X, y in datasets:
    # preprocess dataset, split into training and test part
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)

    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    # just plot the dataset first
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
    # Plot the training points
    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
    # and testing points
    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, 
               cmap=cm_bright, alpha=0.6)
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    i += 1

    # iterate over classifiers
    for name, clf in zip(names, classifiers):
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)

        # Plot the decision boundary. For that, 
        # we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

        # Plot also the training points
        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
                   edgecolors='black', s=25)
        # and testing points
        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                   alpha=0.6, edgecolors='black', s=25)

        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
        ax.set_title(name)
        ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
                size=15, horizontalalignment='right')
        i += 1

figure.subplots_adjust(left=.02, right=.98)
plt.show()

### 실제 분류 데이터를 활용하여 Nerual Network 학습 1

In [None]:
### 실제 데이터를 활용한 Neural network 학습
# classification
wdbc = pd.read_csv('./data/wdbc.csv', delimiter=',')
#print(wdbc)
print(wdbc.shape)

wdbc.head()

In [None]:
wdbc = np.array(wdbc)

# 약 4:6의 label 비율을 가짐
wdbc_y = np.array(list(map(lambda x: int(x=='M'), wdbc[:, 30])))
print('Label Balance : {0}'.format( round(sum(wdbc_y) / len(wdbc_y),4) ))

In [None]:
wdbc_x = np.delete(wdbc, [30], axis=1)

# data scaling
wdbc_x = normalize(wdbc_x, axis=0, norm='max')

# training, text data로 나누기
wdbc_train_x, wdbc_test_x, wdbc_train_y, wdbc_test_y = \
        train_test_split(wdbc_x, wdbc_y, test_size=0.3, random_state=0)

### Cross-Validation을 통한 인공 신경망 학습 파라미터 tuning


**sklearn.model_selection.GridSearchCV** : 'http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html'

In [None]:
### Cross validation을 통한 최적의 hidden node & max iteration 찾기
def NN_CV_classifier(train_x, train_y, max_nodes,CV_N ,plot=False):
    # 1부터 max node 수에 대해서 최적의 Hidden node 수를 cross-validation을 이용하여 찾아보자
    parameters = {'hidden_layer_sizes': np.arange(start=1, stop=max_nodes,step=10).tolist(),
                  'max_iter': [100,200,300,400,500]}
    tmp_NN = MLPClassifier()
    #cross-validation 결과물을 담을 변수
    cv_NN_scores = []
    clf = GridSearchCV(tmp_NN, parameters, cv=CV_N)
    clf.fit(train_x,train_y)

    print( clf.best_params_)
    optimal_parameters = list(clf.best_params_.values())

    print("The optimal number of hidden nodes : {}\n  & max iteration : {}".format(optimal_parameters[0],optimal_parameters[1]))

    #최종 KNN 회귀모형 적합 및 분류 성능 지표 산출
    opt_NN = MLPClassifier(hidden_layer_sizes=optimal_parameters[0],
                           max_iter=optimal_parameters[1])
    opt_NN.fit(X=train_x, y=train_y)

    return opt_NN

### 기존 LDA와의 분류 성능비교

In [None]:
### LDA  분류기와 성능 비교
##########################################################################
# LDA 분류 모델
##########################################################################
def LDA_classifier(train_x, train_y, plot=False):
    # LDA 객체 생성
    lda_clf = LinearDiscriminantAnalysis()
    # LDA fit
    lda_clf.fit(X=train_x, y=train_y)

    if plot:
        # LDA 결과 구해진 w에 대해 사영시키고, histogram 확인
        # 사영된 값 추출
        transposed_x = lda_clf.transform(X=train_x)
        # 각 class 별로 데이터 나누기
        transposed_x_c0 = transposed_x[train_y == 0]
        transposed_x_c1 = transposed_x[train_y == 1]
        # histogram 산출
        plt.hist(transposed_x_c0, bins=100, color='b', label='Class 0')
        plt.hist(transposed_x_c1, bins=100, color='r', label='Class 1')
        plt.title("Transposed data Histogram")
        plt.xlabel("Value")
        plt.ylabel("Frequency")
        plt.legend()
        plt.show()
    return lda_clf

# LDA
wdbc_lda_classifier = LDA_classifier(train_x=wdbc_train_x, 
                                     train_y=wdbc_train_y, plot=True)
wdbc_lda_pred = wdbc_lda_classifier.predict(X=wdbc_test_x)
wdbc_lda_pred_prob = wdbc_lda_classifier.predict_proba(X=wdbc_test_x)
wdbc_lda_cfm, wdbc_lda_acc, wdbc_lda_pre, wdbc_lda_rec, wdbc_lda_f1, wdbc_lda_bcr =\
                    evaluation(y_true=wdbc_test_y, y_pred=wdbc_lda_pred)

print('==== LDA - Classifier ================')
print('accuracy:{}\nrecall:{}\nprecision:{}\nF1:{}\nBCR:{}'.format(round(wdbc_lda_acc,4), 
                                                                   round(wdbc_lda_pre,4), 
                                                                   round(wdbc_lda_rec,4), 
                                                                   round(wdbc_lda_f1,4), 
                                                                   round(wdbc_lda_bcr,4)))
print('=======================================')

### Neural Network 학습


In [None]:
# Neural Network base & Optimized Neural Network
wdbc_NN_clssifier = NN_classifier(train_x=wdbc_train_x, train_y=wdbc_train_y,
                                  hidden_nodes=10)

wdbc_optNN_classifier = NN_CV_classifier(train_x=wdbc_train_x, train_y=wdbc_train_y,
                                         max_nodes=100,CV_N=5)

wdbc_NN_pred = wdbc_NN_clssifier.predict(X=wdbc_test_x)
wdbc_nn_cfm, wdbc_nn_acc, wdbc_nn_pre, wdbc_nn_rec, wdbc_nn_f1, wdbc_nn_bcr =\
                evaluation(y_true=wdbc_test_y, y_pred=wdbc_NN_pred)


print('==== NN - Classifier ================')
print('accuracy:{}\nrecall:{}\nprecision:{}\nF1:{}\nBCR:{}'.format(round(wdbc_nn_acc,4), 
                                                                   round(wdbc_nn_pre,4), 
                                                                   round(wdbc_nn_rec,4), 
                                                                   round(wdbc_nn_f1,4), 
                                                                   round(wdbc_nn_bcr,4)))
print('====================================')

### 단순 LDA와 Neural Network 분류 성능 비교

In [None]:
wdbc_optNN_pred = wdbc_optNN_classifier.predict(X=wdbc_test_x)
wdbc_onn_cfm, wdbc_onn_acc, wdbc_onn_pre, wdbc_onn_rec, wdbc_onn_f1, wdbc_onn_bcr =\
                evaluation(y_true=wdbc_test_y, y_pred=wdbc_optNN_pred)

print('==== NN - Optimized Classifier ================')
print('accuracy:{}\nrecall:{}\nprecision:{}\nF1:{}\nBCR:{}'.format(round(wdbc_onn_acc,4), 
                                                                   round(wdbc_onn_pre,4), 
                                                                   round(wdbc_onn_rec,4), 
                                                                   round(wdbc_onn_f1,4), 
                                                                   round(wdbc_onn_bcr,4)))
print('==============================================')

print('')

summary_tb = pd.DataFrame({'Model':['LDA','Base Neural Network','Optimized Neural Network'],
                           'Accuracy': [round(wdbc_lda_acc, 4), round(wdbc_nn_acc, 4), round(wdbc_onn_acc, 4)],
                           'Precision': [round(wdbc_lda_pre, 4), round(wdbc_nn_pre, 4), round(wdbc_onn_pre, 4)],
                           'Recall': [round(wdbc_lda_rec, 4), round(wdbc_nn_rec, 4), round(wdbc_onn_rec, 4)],
                           'F1-measure': [round(wdbc_lda_f1, 4), round(wdbc_nn_f1, 4), round(wdbc_onn_f1, 4)],
                           'BCR': [round(wdbc_lda_bcr, 4), round(wdbc_nn_bcr, 4), round(wdbc_onn_bcr, 4)]})
summary_tb = summary_tb[['Model', 'Accuracy', 'Precision', 'Recall', 'F1-measure', 'BCR']]
print(summary_tb)

summary_tb

### 실제 분류 데이터를 활용하여 Nerual Network 학습 2

In [None]:
##############
# Regression
##############
car = pd.read_csv('./data/ToyotaCorolla.csv', delimiter=',')

print(car.shape)
car.head()

In [None]:
car = np.array(car)
car_y = np.array(car[:, 0])
car_x = np.delete(car, [0], axis=1)

# data scaling
car_x = normalize(car_x, axis=0, norm='max')

# training, text data로 나누기
car_train_x, car_test_x, car_train_y, car_test_y =\
            train_test_split(car_x, car_y, test_size=0.3, random_state=0)

### Neural Network 학습


** sklearn.neural_network.MLPRegressor** :'http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html'


In [None]:
MLPRegressor?

In [None]:
def NN_CV_regresser(train_x, train_y, max_nodes,CV_N ,plot=False):
    #1부터 10까지의 neighbor에 대해 최적의 k를 cross-validation을 이용하여 찾아보자
    parameters = {'hidden_layer_sizes': np.arange(start=1, stop=max_nodes,step=10).tolist(),
                  'max_iter': [200,400,800,1000,2000]}
    tmp_NN = MLPRegressor( learning_rate_init=0.01 )
    
    #cross-validation 결과물을 담을 변수
    cv_NN_scores = []
    clf = GridSearchCV(tmp_NN, parameters,cv=CV_N)
    clf.fit(train_x,train_y)

    print( clf.best_params_)
    optimal_parameters = list(clf.best_params_.values())

    print("The optimal number of hidden nodes : {}\n  & max iteration : {}".format(optimal_parameters[0],
                                                                                   optimal_parameters[1]))

    #최종 KNN 회귀모형 적합 및 분류 성능 지표 산출
    opt_NN = MLPRegressor(hidden_layer_sizes=optimal_parameters[0],
                          max_iter=optimal_parameters[1])
    opt_NN.fit(X=train_x, y=train_y)

    return opt_NN

### Neural Network 학습

In [None]:
base_NNR = MLPRegressor()
base_NNR.fit(car_train_x, car_train_y)
car_base_NNR_pred=base_NNR.predict(car_test_x)

In [None]:
# Training with optimal hidden nodes & max iteration
car_optNNR=NN_CV_regresser(train_x=car_train_x, train_y=car_train_y,
                           max_nodes=100,CV_N=5)
car_optNNR_pred = car_optNNR.predict(X=car_test_x)

### base NN regression performance
NN_base_regression_rmse = np.sqrt(mean_squared_error(y_true=car_test_y, 
                                                     y_pred=car_base_NNR_pred))

### optimal NN regression performance
NN_opt_regression_rmse = np.sqrt(mean_squared_error(y_true=car_test_y, 
                                                    y_pred=car_optNNR_pred))

### Neural Network for regression 성능 비교

In [None]:
print("Defualt neural network regressor RMSE: {}".format(round(NN_base_regression_rmse,4)))
print("Optimal neural network regressor RMSE:: {}".format(round(NN_opt_regression_rmse,4)))