# NN6: Zjawisko przeuczenia + regularyzacja (L2)
Adrianna Grudzień

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import random
import copy

from py_files.activation_functions import Sigmoid, Linear, Softmax, Tanh, ReLU
from py_files.metrics import mse, f_score, cross_entropy
from py_files.prepare_data import read_classification_data, read_regression_data
from py_files.network import NN

In [2]:
def cv_network(seeds=[123, 1, 2, 23, 42], build_args=None, fit_args=None):
    scores_test = []
    scores_train = []
    nns  = []
    for s in seeds:
        nn = NN(**build_args, seed=s)
        last_fa = None
        for fa in fit_args:
            nn.fit(**fa)
            last_fa = fa
        nns.append(nn)
        scores_test.append(last_fa['metric'](last_fa['y_test'], nn.predict(last_fa['x_test'])))
        scores_train.append(last_fa['metric'](last_fa['y_train'], nn.predict(last_fa['x_train'])))

    return scores_train, scores_test, nns

In [3]:
df_results = pd.DataFrame(columns=['metric', 'mean metric train', 'mean metric test'])

# Zbiór `multimodal-sparse`

In [4]:
ms_x_train, ms_y_train = read_regression_data('../data/regression/multimodal-sparse-training.csv', index_col=None)
ms_x_test, ms_y_test = read_regression_data('../data/regression/multimodal-sparse-test.csv', index_col=None)

# Brak regularyzacji
regularization_rate = 0

In [9]:
ms_no_reg_build = {'input_shape': ms_x_train.shape, 'neurons_num': [32, 64, 32, 1], 'activations': [ReLU(), ReLU(), ReLU(), Linear()]}
ms_no_reg_fit = [{'x_train': ms_x_train, 'y_train': ms_y_train, 'batch_size': 4, 'n_epochs': 700, 'learning_rate': 0.0003, 'x_test': ms_x_test, 'y_test': ms_y_test, 'loss': mse, 'metric': mse, 'verbose_step': 400, 'regularization_rate': 0},
                 {'x_train': ms_x_train, 'y_train': ms_y_train, 'batch_size': 4, 'n_epochs': 700, 'learning_rate': 0.0001, 'x_test': ms_x_test, 'y_test': ms_y_test, 'loss': mse, 'metric': mse, 'verbose_step': 400, 'regularization_rate': 0},
                 {'x_train': ms_x_train, 'y_train': ms_y_train, 'batch_size': 4, 'n_epochs': 700, 'learning_rate': 0.00005, 'x_test': ms_x_test, 'y_test': ms_y_test, 'loss': mse, 'metric': mse, 'verbose_step': 400, 'regularization_rate': 0}]
results_train, results_test, _ = cv_network(build_args=ms_no_reg_build, fit_args=ms_no_reg_fit)

Epoch number 400/700
Loss on training set: 350.5856932506304, loss on test set: 729.8738387687808
Epoch number 400/700
Loss on training set: 17.25277971590778, loss on test set: 165.21724835335326
Epoch number 400/700
Loss on training set: 9.511991995327717, loss on test set: 114.6597245731323
Epoch number 400/700
Loss on training set: 293.7130669238128, loss on test set: 455.9817262361263
Epoch number 400/700
Loss on training set: 120.32831585930244, loss on test set: 290.32511402625977
Epoch number 400/700
Loss on training set: 10.095370741804379, loss on test set: 125.59158751190051
Epoch number 400/700
Loss on training set: 1029.4693684301321, loss on test set: 1171.1318731111598
Epoch number 400/700
Loss on training set: 162.99555374176265, loss on test set: 396.7926317154539
Epoch number 400/700
Loss on training set: 131.8795544486496, loss on test set: 285.54571279021445
Epoch number 400/700
Loss on training set: 428.40413615607076, loss on test set: 828.6685952059753
Epoch numb

In [10]:
df_results.loc['multimodal-sparse-no-reg'] = ['mse', round(np.mean(results_train)), round(np.mean(results_test))]

In [11]:
df_results

Unnamed: 0,metric,mean metric train,mean metric test
multimodal-sparse-no-reg,mse,57,161


# Regularyzacja
regularization_rate = 0.01

In [12]:
ms_l2_build = {'input_shape': ms_x_train.shape, 'neurons_num': [32, 64, 32, 1], 'activations': [ReLU(), ReLU(), ReLU(), Linear()]}
ms_l2_fit = [{'x_train': ms_x_train, 'y_train': ms_y_train, 'batch_size': 4, 'n_epochs': 700, 'learning_rate': 0.0003, 'x_test': ms_x_test, 'y_test': ms_y_test, 'loss': mse, 'metric': mse, 'verbose_step': 400, 'regularization_rate': 0.01},
                 {'x_train': ms_x_train, 'y_train': ms_y_train, 'batch_size': 4, 'n_epochs': 700, 'learning_rate': 0.0001, 'x_test': ms_x_test, 'y_test': ms_y_test, 'loss': mse, 'metric': mse, 'verbose_step': 400, 'regularization_rate': 0.01},
                 {'x_train': ms_x_train, 'y_train': ms_y_train, 'batch_size': 4, 'n_epochs': 700, 'learning_rate': 0.00005, 'x_test': ms_x_test, 'y_test': ms_y_test, 'loss': mse, 'metric': mse, 'verbose_step': 400, 'regularization_rate': 0.01}]
results_train, results_test , _ = cv_network(build_args=ms_l2_build, fit_args=ms_l2_fit)

Epoch number 400/700
Loss on training set: 347.2434667545519, loss on test set: 791.1746711908073
Epoch number 400/700
Loss on training set: 95.92488180988421, loss on test set: 245.45541107421445
Epoch number 400/700
Loss on training set: 21.04913718562114, loss on test set: 135.46835043401282
Epoch number 400/700
Loss on training set: 364.96524716743124, loss on test set: 447.0007632333909
Epoch number 400/700
Loss on training set: 31.80025251555884, loss on test set: 180.0717883133276
Epoch number 400/700
Loss on training set: 9.90972140665591, loss on test set: 153.72047560004503
Epoch number 400/700
Loss on training set: 323.03997737491045, loss on test set: 515.1251823609941
Epoch number 400/700
Loss on training set: 137.74506252344602, loss on test set: 295.7853958423769
Epoch number 400/700
Loss on training set: 127.55183833300521, loss on test set: 226.16863259597983
Epoch number 400/700
Loss on training set: 373.916924257357, loss on test set: 500.5152181150997
Epoch number 4

In [13]:
df_results.loc['multimodal-sparse-l2-0.01'] = ['mse', round(np.mean(results_train), 2), round(np.mean(results_test), 2)]

In [14]:
df_results

Unnamed: 0,metric,mean metric train,mean metric test
multimodal-sparse-no-reg,mse,57.0,161.0
multimodal-sparse-l2-0.01,mse,33.22,137.25


Dzięki regularyzacji średnie MSE zmalało.

# Zbiór `rings5-sparse`

In [31]:
r5_x_train, r5_y_train = read_classification_data('../data/classification/rings5-sparse-training.csv')
r5_x_test, r5_y_test = read_classification_data('../data/classification/rings5-sparse-test.csv')

In [34]:
r5_no_reg_build = {'input_shape': r5_x_train.shape, 'neurons_num': [40, 40, 5], 'activations': [ReLU(), ReLU(), Softmax()]}
r5_no_reg_fit = [{'x_train': r5_x_train, 'y_train': r5_y_train, 'batch_size': 4, 'n_epochs': 800, 'learning_rate': 0.00005, 'x_test': r5_x_test, 'y_test': r5_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 400, 'regularization_rate': 0},
                 {'x_train': r5_x_train, 'y_train': r5_y_train, 'batch_size': 4, 'n_epochs': 400, 'learning_rate': 0.00001, 'x_test': r5_x_test, 'y_test': r5_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 400, 'regularization_rate': 0}]
                 
results_train, results_test, _ = cv_network(build_args=r5_no_reg_build, fit_args=r5_no_reg_fit)

  return np.exp(x) / np.sum(np.exp(x))
  return np.exp(x) / np.sum(np.exp(x))


Epoch number 400/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 800/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 400/400
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856


  return np.exp(x) / np.sum(np.exp(x))
  return np.exp(x) / np.sum(np.exp(x))


Epoch number 400/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 800/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 400/400
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 400/800
Loss on training set: 1.183169074040114 f_score on training set: 0.5265588808622863, loss on test set: 2.360917912181012 f_score on test set: 0.3653873143192332
Epoch number 800/800
Loss on training set: 0.6231349430560033 f_score on training set: 0.5811168479730204, loss on test set: 1.7938822859239498 f_score on test set: 0.4248904786890268
Epoch number 400/400
Loss on training set: 0.2381615132168083 f_score on training set: 0.7261048755166403, loss on test set: 1.3611452098925685 f_score on test set: 0

  return np.exp(x) / np.sum(np.exp(x))
  return np.exp(x) / np.sum(np.exp(x))


Epoch number 400/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 800/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 400/400
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856


  return np.exp(x) / np.sum(np.exp(x))
  return np.exp(x) / np.sum(np.exp(x))


Epoch number 400/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 800/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 400/400
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856


In [35]:
df_results.loc['rings5-sparse-no-reg'] = ['mse', round(np.mean(results_train)), round(np.mean(results_test))]

In [None]:
r5_l2_build = {'input_shape': r5_x_train.shape, 'neurons_num': [40, 40, 5], 'activations': [ReLU(), ReLU(), Softmax()]}
r5_l2_fit = [{'x_train': r5_x_train, 'y_train': r5_y_train, 'batch_size': 4, 'n_epochs': 800, 'learning_rate': 0.00005, 'x_test': r5_x_test, 'y_test': r5_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 10, 'regularization_rate': 0.1},
                 {'x_train': r5_x_train, 'y_train': r5_y_train, 'batch_size': 4, 'n_epochs': 400, 'learning_rate': 0.00001, 'x_test': r5_x_test, 'y_test': r5_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 10, 'regularization_rate': 0.1}]
results_train, results_test, _ = cv_network(build_args=r5_no_reg_build, fit_args=r5_no_reg_fit)

  return np.exp(x) / np.sum(np.exp(x))
  return np.exp(x) / np.sum(np.exp(x))


Epoch number 400/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 800/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856
Epoch number 400/400
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856


  return np.exp(x) / np.sum(np.exp(x))
  return np.exp(x) / np.sum(np.exp(x))


Epoch number 400/800
Loss on training set: nan f_score on training set: 0.06666666666666668, loss on test set: nan f_score on test set: 0.00448688602765856


In [None]:
df_results.loc['rings5-sparse-l2-0.1'] = ['mse', round(np.mean(results_train)), round(np.mean(results_test))]

In [None]:
df_results

In [None]:
def average_loss(nns):
    n = len(nns) - 1
    sum_loss_train = np.array(nns[1].history['loss_train'])
    sum_loss_test = np.array(nns[1].history['loss_test'])
    for i in range(2, len(nns)):
        sum_loss_train = np.add(sum_loss_train, np.array(nns[i].history['loss_train']))
        sum_loss_test = np.add(sum_loss_test, np.array(nns[i].history['loss_test']))
    return sum_loss_train / n, sum_loss_test / n

In [None]:
sum_loss_train = np.array(r5_no_reg_nns[1].history['loss_train'])
sum_loss_test = np.array(r5_no_reg_nns[1].history['loss_test'])
for i in range(2, 5):
    sum_loss_train = np.add(sum_loss_train, np.array(r5_no_reg_nns[i].history['loss_train']))
    sum_loss_test = np.add(sum_loss_test, np.array(r5_no_reg_nns[i].history['loss_test']))

In [None]:
avg_loss_r5_no_reg = average_loss(r5_no_reg_nns)
avg_loss_r5_l2 = average_loss(r5_l2_nns)

In [None]:
plt.figure(figsize=[12, 4])
plt.subplot(1, 2, 1)
plt.plot(np.arange(1200), avg_loss_r5_no_reg[0])
plt.ylim(0, 5)
plt.title('Uśredniona funkcja straty bez regularyzacji')
plt.plot(np.arange(1200), avg_loss_r5_no_reg[1], c='red')
plt.subplot(1, 2, 2)
plt.plot(np.arange(270), avg_loss_r5_l2[0])
plt.ylim(0, 5)
plt.title('Uśredniona funkcja straty z regularyzacją L2')
plt.plot(np.arange(270), avg_loss_r5_l2[1], c='red')
plt.show()

# Zbiór `rings3-balance`

In [None]:
r3_x_train, r3_y_train = read_classification_data('../data/classification/rings3-balance-training.csv')
r3_x_test, r3_y_test = read_classification_data('../data/classification/rings3-balance-test.csv')

In [None]:
r3_no_reg_build = {'input_shape': r3_x_train.shape, 'neurons_num': [40, 40, 3], 'activations': [ReLU(), ReLU(), Softmax()]}
r3_no_reg_fit = [{'x_train': r3_x_train, 'y_train': r3_y_train, 'batch_size': 4, 'n_epochs': 800, 'learning_rate': 0.00005, 'x_test': r3_x_test, 'y_test': r3_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 400, 'regularization_rate': 0},
                 {'x_train': r3_x_train, 'y_train': r3_y_train, 'batch_size': 4, 'n_epochs': 400, 'learning_rate': 0.00001, 'x_test': r3_x_test, 'y_test': r3_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 400, 'regularization_rate': 0}]
                 
results_train, results_test, _ = cv_network(build_args=r3_no_reg_build, fit_args=r3_no_reg_fit)

# Zbiór `xor3-balance`

In [None]:
x3_x_train, x3_y_train = read_classification_data('../data/classification/xor3-balance-training.csv')
x3_x_test, x3_y_test = read_classification_data('../data/classification/xor3-balance-test.csv')

In [None]:
x3_no_reg_build = {'input_shape': x3_x_train.shape, 'neurons_num': [40, 40, 3], 'activations': [ReLU(), ReLU(), Softmax()]}
x3_no_reg_fit = [{'x_train': x3_x_train, 'y_train': x3_y_train, 'batch_size': 4, 'n_epochs': 800, 'learning_rate': 0.00005, 'x_test': x3_x_test, 'y_test': x3_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 400, 'regularization_rate': 0},
                 {'x_train': x3_x_train, 'y_train': x3_y_train, 'batch_size': 4, 'n_epochs': 400, 'learning_rate': 0.00001, 'x_test': x3_x_test, 'y_test': x3_y_test, 'loss': cross_entropy, 'metric': f_score, 'verbose_step': 400, 'regularization_rate': 0}]
                 
results_train, results_test, _ = cv_network(build_args=x3_no_reg_build, fit_args=x3_no_reg_fit)