<a href="https://colab.research.google.com/github/gorzanskik-ai/short-term-stocks-price-prediction/blob/main/classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import plotly.express as px

np.random.seed(42)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, Flatten, LSTM
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score

Mounted at /content/drive


In [None]:
path = "/content/drive/MyDrive/master's thesis/day/classification.csv"
df_raw = pd.read_csv(path)
df_raw.index = df_raw.Date
df_raw = df_raw.drop('Date', axis=1)
df = df_raw.copy()

In [None]:
df.TARGET.value_counts()

1.0    9962
0.0    8297
Name: TARGET, dtype: int64

Train Test Split

In [None]:
X = df.drop('TARGET', axis=1).copy()
y = df['TARGET'].copy()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

features = ['Day', 'Month', 'Year', 'RSI', 'MFI', 'CMO', 'UO', 'ADX']
scaler = MinMaxScaler()
scaler.fit(X_train[features])
X_train[features] = scaler.transform(X_train[features])
X_test[features] = scaler.transform(X_test[features])

X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

y_train = y_train.reshape(y_train.shape[0], 1, 1)

In [None]:
def build_model_rnn():
    lstm = Sequential()
    lstm.add(LSTM(64, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False))
    #lstm.add(LSTM(64, return_sequences=True))
    #lstm.add(LSTM(32, return_sequences=True))
    lstm.add(Dense(1, activation='sigmoid'))
    lstm.compile(optimizer='adam', 
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

    return lstm

In [None]:
model = build_model_rnn()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                22528     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 22,593
Trainable params: 22,593
Non-trainable params: 0
_________________________________________________________________


In [None]:
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=0, patience=10)
history = model.fit(X_train, y_train, epochs=50, validation_split=0.25, verbose=0, batch_size=32, callbacks=[es])

metrics = pd.DataFrame(history.history)
metrics.iloc[-1:]

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
14,0.515235,0.732177,0.514172,0.730832


In [None]:
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_pred.shape[0])
y_true = np.copy(y_test)

pred = pd.DataFrame()
pred['0'] = np.absolute(y_pred - 1)
pred['1'] = y_pred
pred = pred.to_numpy()
y_pred = np.argmax(pred, axis=1)
print(f'Accuracy score: {accuracy_score(y_true, y_pred)}')

Accuracy score: 0.726998904709748


In [None]:
model.save("/content/drive/MyDrive/master's thesis/day/classification.h5")

In [None]:
def plot_hist(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['accuracy'], name='accuracy', mode='markers+lines'))
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['val_accuracy'], name='val_accuracy', mode='markers+lines'))
    fig.update_layout(width=1000, height=500, title='accuracy vs. val accuracy', xaxis_title='Epoki', yaxis_title='accuracy', yaxis_type='log')
    fig.show()

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['loss'], name='loss', mode='markers+lines'))
    fig.add_trace(go.Scatter(x=hist['epoch'], y=hist['val_loss'], name='val_loss', mode='markers+lines'))
    fig.update_layout(width=1000, height=500, title='loss vs. val loss', xaxis_title='Epoki', yaxis_title='loss', yaxis_type='log')
    fig.show()

plot_hist(history)

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)

import plotly.figure_factory as ff

def plot_confusion_matrix(cm):
    cm = cm[::-1]
    cm = pd.DataFrame(cm, columns=['negative', 'positive'], index=['positive', 'negative'])

    fig = ff.create_annotated_heatmap(z=cm.values, x=list(cm.columns), y=list(cm.index), 
                                      colorscale='ice', showscale=True, reversescale=True)
    fig.update_layout(width=400, height=400, title='Confusion Matrix', font_size=16)
    fig.show()

plot_confusion_matrix(cm)

By Time

In [None]:
X = df.drop('TARGET', axis=1).copy()
y = df['TARGET'].copy()

length = df.shape[0]
range = int(round(0.8 * length, 0))

X_train = X[:range]
X_test = X[range:]
y_train = y[:range]
y_test = y[range:]

features = ['Day', 'Month', 'Year', 'RSI', 'MFI', 'CMO', 'UO', 'ADX']
scaler = MinMaxScaler()
scaler.fit(X_train[features])
X_train[features] = scaler.transform(X_train[features])
X_test[features] = scaler.transform(X_test[features])

X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

y_train = y_train.reshape(y_train.shape[0], 1, 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


In [None]:
def build_model_rnn():
    lstm = Sequential()
    lstm.add(LSTM(128, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=False))
    #lstm.add(LSTM(128, return_sequences=True))
    #lstm.add(LSTM(128, return_sequences=True))
    lstm.add(Dense(1, activation='sigmoid'))
    lstm.compile(optimizer='adam', 
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

    return lstm

In [None]:
model = build_model_rnn()
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 128)               77824     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 77,953
Trainable params: 77,953
Non-trainable params: 0
_________________________________________________________________


In [None]:
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=0, patience=10)
history = model.fit(X_train, y_train, epochs=500, validation_split=0.25, verbose=0, batch_size=32, callbacks=[es])

metrics = pd.DataFrame(history.history)
metrics.iloc[-1:]

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
24,0.50359,0.733638,0.49604,0.750274


In [None]:
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_pred.shape[0])
y_true = np.copy(y_test)

pred = pd.DataFrame()
pred['0'] = np.absolute(y_pred - 1)
pred['1'] = y_pred
pred = pred.to_numpy()
y_pred = np.argmax(pred, axis=1)
print(f'Accuracy score: {accuracy_score(y_true, y_pred)}')

Accuracy score: 0.7171412924424972


Time Series Split

In [None]:
X = df.drop('TARGET', axis=1).copy()
y = df['TARGET'].copy()

features = ['Day', 'Month', 'Year', 'RSI', 'MFI', 'CMO', 'UO', 'ADX']
normal_inputs = X
normal_inputs[features] = (X[features] - X[features].min()) / (X[features].max() - X[features].min())
targets = y

timesplit= TimeSeriesSplit(n_splits=10)
for train_index, test_index in timesplit.split(normal_inputs):
        X_train, X_test = normal_inputs[:train_index.shape[0]], normal_inputs[train_index.shape[0]: (train_index.shape[0]+test_index.shape[0])]
        y_train, y_test = targets[:train_index.shape[0]].values.ravel(), targets[train_index.shape[0]: (train_index.shape[0]+test_index.shape[0])].values.ravel()

X_train = np.array(X_train)
X_test = np.array(X_test)

X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

y_train = y_train.reshape(y_train.shape[0], 1, 1)

In [None]:
def build_model_rnn():
    lstm = Sequential()
    lstm.add(LSTM(128, input_shape=(1, X_train.shape[2]), activation='relu', return_sequences=True))
    lstm.add(LSTM(128, return_sequences=True))
    lstm.add(LSTM(128, return_sequences=True))
    lstm.add(Dense(1, activation='sigmoid'))
    lstm.compile(optimizer='adam', 
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

    return lstm

In [None]:
model = build_model_rnn()
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 1, 128)            77824     
                                                                 
 lstm_7 (LSTM)               (None, 1, 128)            131584    
                                                                 
 lstm_8 (LSTM)               (None, 1, 128)            131584    
                                                                 
 dense_2 (Dense)             (None, 1, 1)              129       
                                                                 
Total params: 341,121
Trainable params: 341,121
Non-trainable params: 0
_________________________________________________________________


In [None]:
es = EarlyStopping(monitor='val_accuracy', mode='max', verbose=0, patience=10)
history = model.fit(X_train, y_train, epochs=500, validation_split=0.25, verbose=0, batch_size=32, callbacks=[es])

metrics = pd.DataFrame(history.history)
metrics.iloc[-1:]

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
11,0.503482,0.736787,0.523728,0.733012


In [None]:
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_pred.shape[0])
y_true = np.copy(y_test)

pred = pd.DataFrame()
pred['0'] = np.absolute(y_pred - 1)
pred['1'] = y_pred
pred = pred.to_numpy()
y_pred = np.argmax(pred, axis=1)
print(f'Accuracy score: {accuracy_score(y_true, y_pred)}')

Accuracy score: 0.6980108499095841
