In [145]:

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [146]:
import pandas as pd

test= pd.read_csv(r"C:\Users\alesi\Documents\group_project_ml\test.csv")
train= pd.read_csv(r"C:\Users\alesi\Documents\group_project_ml\train.csv")

In [147]:
financial_train = train.drop("y", axis=1) # drop labels for training 
y=train["y"].copy()
financial_train2 = financial_train.drop("w", axis=1)


In [148]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0, 1))

fin_array = scaler.fit_transform(financial_train2)


In [149]:
features_set = np.reshape(fin_array, ( fin_array.shape[0],fin_array.shape[1],1))

In [150]:
features_set


array([[[0.77543941],
        [0.73641688],
        [0.66819053],
        ...,
        [0.81500202],
        [0.80270631],
        [0.70407194]],

       [[0.76573625],
        [0.69986236],
        [0.62592815],
        ...,
        [0.85070807],
        [0.82899471],
        [0.84460308]],

       [[0.7421511 ],
        [0.7122393 ],
        [0.76833876],
        ...,
        [0.61630346],
        [0.62767519],
        [0.68464607]],

       ...,

       [[0.84564578],
        [0.83921647],
        [0.83300523],
        ...,
        [0.78301731],
        [0.77935849],
        [0.78419072]],

       [[0.79846584],
        [0.79272539],
        [0.79541808],
        ...,
        [0.77660452],
        [0.77457619],
        [0.78108037]],

       [[0.91306063],
        [0.90600368],
        [0.89643146],
        ...,
        [0.77566848],
        [0.7717734 ],
        [0.77295746]]])

In [151]:
features_set.shape

(7326, 50, 1)

In [152]:
from keras.models import Sequential
from keras.layers import Dense, Activation, TimeDistributed, RepeatVector
from keras.layers import LSTM, GRU
from keras.layers import Dropout
from keras.callbacks import EarlyStopping

In [153]:
early_stopping = EarlyStopping(monitor="val_loss",
    min_delta=0.004, 
    patience=10, 
    restore_best_weights=True,
)

In [154]:
labels=np.array(y)
labels
labels.shape


(7326,)

In [165]:
model = Sequential()

model.add(LSTM(units=50, activation="relu", return_sequences="True" ,input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units=50, activation="relu", return_sequences="True" ,input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))

model.add(LSTM(units=50, activation="relu", input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))


model.add(Dense(100, activation="relu"))
model.add(Dense(units = 1))
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(features_set, labels, epochs = 20,validation_split=0.2, batch_size = 32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1e4c11297f0>

In [167]:

model.fit(features_set, labels, epochs = 50,validation_split=0.1, batch_size = 3663, callbacks=[early_stopping])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


<tensorflow.python.keras.callbacks.History at 0x1e4c40ddcd0>

In [49]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

def build_classifier(optimizer):
    grid_model = Sequential()
    grid_model.add(LSTM(units = 64,input_shape = (50,1)))
    grid_model.add(Dropout(0.4))
    grid_model.add(Dense(1))

    grid_model.compile(loss = 'mse',optimizer = optimizer, metrics = ['mean_squared_error'])
    return grid_model

grid_model = KerasClassifier(build_fn=build_classifier)
parameters = {'batch_size' : [32],
               'epochs' : [20,50],
              'optimizer' : ['adam'] }

grid_search  = GridSearchCV(estimator = grid_model,
                            param_grid = parameters,
                            scoring = 'neg_mean_squared_error',
                            cv = 2)



grid_search = grid_search.fit(features_set,labels)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [50]:
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
best_parameters

{'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}

In [38]:

test_scaled=scaler.transform(test)
test_scaled


array([[0.2234717 , 0.16163201, 0.20019499, ..., 0.42314219, 0.39647356,
        0.38653022],
       [0.54778469, 0.53651845, 0.59138859, ..., 0.64188457, 0.57389185,
        0.54498805],
       [0.81151686, 0.75810207, 0.75106067, ..., 0.7945658 , 0.83358227,
        0.91174531],
       ...,
       [0.96338652, 0.95140427, 0.94364447, ..., 0.76946144, 0.76684189,
        0.77329558],
       [0.78042029, 0.78092329, 0.77810149, ..., 0.79887116, 0.79436882,
        0.79631411],
       [0.87834256, 0.87097847, 0.86740449, ..., 0.79194238, 0.78574633,
        0.7925877 ]])

In [39]:

test_features = np.reshape(test_scaled, (-1, test_scaled.shape[1], 1))
test_features.shape


test_features.shape

(3141, 50, 1)

In [159]:
predictions = model.predict(test_features)
predictions

array([[-0.43264723],
       [ 0.28264827],
       [ 1.5591255 ],
       ...,
       [ 1.0240332 ],
       [ 1.0181649 ],
       [ 1.0232127 ]], dtype=float32)

In [163]:
np.savetxt("submissions23.txt", predictions,delimiter = ",")