# BSK Dataset

**Package**

In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

**Loading and Processing Data**

In [2]:
dftraining = pd.read_csv("datasets/nncoursedataset/bks/train.csv")
dftest = pd.read_csv("datasets/nncoursedataset/bks/test.csv")
dfvalidation = pd.read_csv("datasets/nncoursedataset/bks/validation.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'datasets/nncoursedataset/bks/train.csv'

In [None]:
dftraining.head()

In [None]:
dftraining.describe()

In [None]:
dftraining[:-1].values

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
dftraining.hist(bins=50, figsize=(20,15))
save_fig("attribute_histogram_plots")
plt.show()

In [None]:
training = dftraining[:-1].values
validation = dfvalidation[:].values
test = dftest[:].values

In [None]:
print("Training dimentions: " + str(training.shape))
print("Validation dimentions: " + str(validation.shape))
print("Test dimentions: " + str(test.shape))

**Preparing data and targets**

In [None]:
Xtrain = training[:,:-1]
Ytrain = training[: ,-1]
Xtest = test[:,:-1]
Ytest = test[:,-1]
Xvalidation = validation[:,:-1]
Yvalidation = validation[:,-1]

In [None]:
Xtrain.shape

In [None]:
Ytrain.shape

**Defining Model**

In [None]:
mlp = make_pipeline(StandardScaler(), MLPRegressor(hidden_layer_sizes=(150,), activation='relu', solver='sgd', alpha=0.0001, 
                                    batch_size='auto', learning_rate='constant', learning_rate_init=0.001, 
                                    power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, 
                                    verbose=True, warm_start=True, momentum=0.9, nesterovs_momentum=True, 
                                    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, 
                                    epsilon=1e-08, n_iter_no_change=10, max_fun=15000))

mlp.fit(Xtrain, Ytrain)

In [None]:
mlp1 = MLPRegressor(hidden_layer_sizes=(150,), activation='relu', solver='sgd', alpha=0.0001, 
                                    batch_size='auto', learning_rate='constant', learning_rate_init=0.001, 
                                    power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, 
                                    verbose=True, warm_start=True, momentum=0.9, nesterovs_momentum=True, 
                                    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, 
                                    epsilon=1e-08, n_iter_no_change=10, max_fun=15000)

mlp1.fit(Xtrain, Ytrain)

In [None]:
mlp2 = MLPRegressor(hidden_layer_sizes=(150,), activation='logistic', solver='sgd', alpha=0.0001, 
                                    batch_size='auto', learning_rate='constant', learning_rate_init=0.01, 
                                    power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, 
                                    verbose=True, warm_start=True, early_stopping=False, validation_fraction=0.1, 
                                    beta_1=0.9, beta_2=0.999, 
                                    epsilon=1e-08, n_iter_no_change=10, max_fun=15000)

mlp2.fit(Xtrain, Ytrain)

In [None]:
results2 = mlp2.predict(Xtest)
results1 = mlp1.predict(Xtest)

In [None]:
len(results)

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
lin_mse1 = mean_squared_error(results1, Ytest)
lin_mse2 = mean_squared_error(results2, Ytest)
print("Error mpl1: " + str(lin_mse1) + "\nError mpl2: " + str(lin_mse2))

In [None]:
mlp2

In [None]:
def plot_learning_curves():
    #X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
    train_errors, val_errors = [], []
    
    for m in range(10, 100, 10):
        mlp = MLPRegressor(hidden_layer_sizes=(m,), activation='logistic', solver='sgd', alpha=0.0001, 
                                    batch_size='auto', learning_rate='constant', learning_rate_init=0.01, 
                                    power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, 
                                    verbose=False, warm_start=True, early_stopping=False, validation_fraction=0.1, 
                                    beta_1=0.9, beta_2=0.999, 
                                    epsilon=1e-08, n_iter_no_change=10, max_fun=15000)
        mlp.fit(Xtrain, Ytrain)
        y_train_predict = mlp.predict(Xtrain)
        y_val_predict = mlp.predict(Xtest)
        train_errors.append(mean_squared_error(y_train_predict, Ytrain))
        val_errors.append(mean_squared_error(y_val_predict, Ytest))
    
    plt.plot(np.sqrt(train_errors), "r-+", linewidth=2, label="train")
    plt.plot(np.sqrt(val_errors), "b-", linewidth=3, label="val")
    
plot_learning_curves()