### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import sklearn
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.layers import Dropout
from keras.constraints import maxnorm
#from keras.wrappers.scikit_learn import KerasRegressor

from math import sqrt
import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [2]:
df1 = pd.read_csv('traincleaned.csv')

In [None]:
df1.head()

In [None]:
df1.info()

In [None]:
df1.describe()

In [3]:
array = df1.values

In [4]:
type(array)

numpy.ndarray

In [5]:
X = array[:,0:12]
y = array[:,12]

In [6]:
X

array([[ 0.        ,  1.        ,  0.        , ...,  0.        ,
        -0.02507689,  0.04203268],
       [ 0.        ,  1.        ,  0.        , ...,  0.        ,
         0.1222304 , -0.29808789],
       [ 1.        ,  0.        ,  0.        , ...,  0.        ,
         0.17834747, -0.37582973],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  1.        ,
        -0.53013048, -0.16203966],
       [ 0.        ,  1.        ,  0.        , ...,  1.        ,
         1.31471803, -0.16203966],
       [ 0.        ,  1.        ,  0.        , ...,  0.        ,
        -0.92294993, -0.02599143]])

In [7]:
y

array([1., 2., 3., ..., 1., 1., 6.])

In [8]:
X.shape

(200000, 12)

In [9]:
y.shape

(200000,)

### Use base Keras model

In [10]:
base = Sequential()
base.add(Dense(units=12,input_dim=12,activation='relu',kernel_constraint=maxnorm(3)))
base.add(Dense(1,activation='linear'))

In [11]:
base.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 12)                156       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 13        
Total params: 169
Trainable params: 169
Non-trainable params: 0
_________________________________________________________________


### Using Grid Search to find best parameters

In [12]:
params_grid = {'batch_size': [10,20],
               'epochs':[10,20]
              }

In [13]:
grid_search = GridSearchCV(base,params_grid,scoring='r2',n_jobs=-1,verbose=1)

In [14]:
grid_search.fit(X,y)

TypeError: Cannot clone object '<keras.engine.sequential.Sequential object at 0x000001AC8EF252C8>' (type <class 'keras.engine.sequential.Sequential'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' methods.

### Create model and training

In [None]:
model = Sequential()
model.add(Dense(units=12,input_dim=12,activation='relu',kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(units=100,activation='relu',kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(units=100,activation='relu',kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(units=100,activation='relu',kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(units=100,activation='relu',kernel_constraint=maxnorm(3)))
model.add(Dropout(0.2))
model.add(Dense(1,activation='linear'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['accuracy'])

In [None]:
trainresult = model.fit(X,y,epochs=50,validation_split=0.2,batch_size=1000)

### Model evaluation

In [None]:
trainresult.history.keys()

In [None]:
plt.plot(trainresult.history['loss'])
plt.plot(trainresult.history['val_loss'])
plt.title('Model Loss Progress During Training')
plt.xlabel('Epoch')
plt.ylabel('Training and Validation Loss')
plt.legend(['Training Loss', 'Validation Loss'])
plt.show()

# Model Prediction

In [None]:
X_test = pd.read_csv('testmodified.csv')

In [None]:
X_test.head()

In [None]:
X_test.tail()

In [None]:
X_test.shape

In [None]:
y_predict = model.predict(X_test,verbose=True)

In [None]:
y_predict

In [None]:
plt.plot(y, y_predict)
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
plt.show()

In [None]:
y_predict.shape

In [None]:
prediction = pd.DataFrame(y_predict)

In [None]:
prediction

In [None]:
prediction.to_csv('kerasresult.csv')

In [None]:
MSE = mean_squared_error(y,y_predict)
print("Mean Squared Error is", MSE)

In [None]:
RMSE = format(np.sqrt(MSE),'.3f')
print("RMSE is", RMSE)

In [None]:
MAE = mean_absolute_error(y,y_predict)
print("MAE is", MAE)

In [None]:
r2 = r2_score(y,y_predict)
print("R2 score is", r2)

Result is 0.4925