In [1]:
########## Calling relevant libraries ##########
%matplotlib inline
import numpy as np
from sklearn.metrics import mean_squared_error
import scipy

import tensorflow as tf

import pandas as pd
import rampy as rp

import sklearn
import sklearn.model_selection as model_selection
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingRegressor

from sklearn.externals import joblib
import sqlite3

In [2]:
#
con = sqlite3.connect('./data/viscosity.sqlite')

dataset = pd.read_sql_query("SELECT * from viscosity", con)
con.close() # closing the sql connection



In [3]:
tv_sub, test_sub, idxtv_sub, idxtest_sub = rp.chemical_splitting(dataset,'Name',0.15)
valid_sub, train_sub, idxvalid_sub, idxtrain_sub = rp.chemical_splitting(tv_sub,'Name',0.85)

y_train = train_sub["viscosity"].values.reshape(-1,1)
y_valid = valid_sub["viscosity"].values.reshape(-1,1)
y_test = test_sub["viscosity"].values.reshape(-1,1)

X_train = train_sub.loc[:,"sio2":"T"].values
X_valid = valid_sub.loc[:,"sio2":"T"].values
X_test = test_sub.loc[:,"sio2":"T"].values

In [4]:
# parameters for neural net
prm_nn = dict(hidden_layer_sizes=(10,5,),solver = 'lbfgs',activation='relu',early_stopping=True)
param_b = dict(n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=1, verbose=0)

model = rp.mlregressor(X_train,y_train,algorithm="BaggingNeuralNet",
                                                                 X_test=X_test, y_test=y_test, scaler = "StandardScaler",param_neurons=prm_nn,param_bagging=param_b,rand_state=100)

model.fit()
# we grab the scaling from rp.mlregressor

In [5]:
X_train_sc = model.X_scaler.transform(X_train)
y_train_sc = model.Y_scaler.transform(y_train.reshape(-1,1))

X_valid_sc = model.X_scaler.transform(X_valid)
y_valid_sc = model.Y_scaler.transform(y_valid.reshape(-1,1))

X_test_sc = model.X_scaler.transform(X_test)
y_test_sc = model.Y_scaler.transform(y_test.reshape(-1,1))

In [6]:
MSE_NN_train = np.sqrt(mean_squared_error(y_train, model.prediction_train))
MSE_NN_test = np.sqrt(mean_squared_error(y_test, model.prediction_test))

print("training and testing RMSE with neural nets")
print(MSE_NN_train)
print(MSE_NN_test)


training and testing RMSE with neural nets
0.40678684672309756
0.5218150877142917


In [7]:
print("X training set shape, unscaled and scaled")
print(X_train.shape)
print(X_train_sc.shape)

print("X valid set shape, unscaled and scaled")
print(X_valid.shape)
print(X_valid_sc.shape)

print("X test set shape, unscaled and scaled")
print(X_test.shape)
print(X_test_sc.shape)

X training set shape, unscaled and scaled
(4296, 15)
(4296, 15)
X valid set shape, unscaled and scaled
(835, 15)
(835, 15)
X test set shape, unscaled and scaled
(792, 15)
(792, 15)


In [8]:
joblib.dump(X_train, "./data/X_train.pkl")
joblib.dump(X_valid, "./data/X_valid.pkl")
joblib.dump(X_test, "./data/X_test.pkl")

joblib.dump(X_train_sc, "./data/X_train_sc.pkl")
joblib.dump(X_valid_sc, "./data/X_valid_sc.pkl")
joblib.dump(X_test_sc, "./data/X_test_sc.pkl")

joblib.dump(y_train, "./data/y_train.pkl")
joblib.dump(y_valid, "./data/y_valid.pkl")
joblib.dump(y_test, "./data/y_test.pkl")

joblib.dump(y_train_sc, "./data/y_train_sc.pkl")
joblib.dump(y_valid_sc, "./data/y_valid_sc.pkl")
joblib.dump(y_test_sc, "./data/y_test_sc.pkl")

joblib.dump(model.X_scaler,"./data/X_scaler.pkl")
joblib.dump(model.Y_scaler,"./data/y_scaler.pkl")

['./data/y_scaler.pkl']