In [66]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
import tensorflow as tf
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.layers import GRU, Dense, Flatten, Conv1D, BatchNormalization, LeakyReLU, ELU, ReLU
from tensorflow.keras import Sequential, regularizers

In [52]:
dataset = pd.read_csv('data.csv', parse_dates=['Date'])
display(dataset)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,pct_change,log_change,7ma,...,21lower,7rsv,14rsv,21rsv,FT_3components,FT_6components,FT_9components,FT_27components,FT_81components,FT_100components
0,2010-01-04,6.812500,6.830500,6.657000,6.695000,6.695000,151998000,,,6.695000,...,,,,,66.793522,50.193968,48.359400,53.895564,50.741649,51.699533
1,2010-01-05,6.671500,6.774000,6.590500,6.734500,6.734500,177038000,0.005900,0.005883,6.717571,...,,,,,66.521917,49.814025,47.852070,52.506692,46.681271,46.062446
2,2010-01-06,6.730000,6.736500,6.582500,6.612500,6.612500,143576000,-0.018116,-0.018282,6.672135,...,,,,,66.250508,49.435137,47.346233,51.110480,42.640874,40.471992
3,2010-01-07,6.600500,6.616000,6.440000,6.500000,6.500000,220604000,-0.017013,-0.017160,6.609183,...,,,,,65.979299,49.057314,46.841923,49.708200,38.647976,35.000315
4,2010-01-08,6.528000,6.684000,6.451500,6.676000,6.676000,196610000,0.027077,0.026717,6.631085,...,,,,,65.708294,48.680566,46.339174,48.301138,34.729693,29.716822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3330,2023-03-28,98.110001,98.440002,96.290001,97.239998,97.239998,38720100,-0.008160,-0.008193,97.889734,...,89.726903,0.000000,65.890651,65.890651,68.154377,52.109153,50.917241,60.686761,70.395837,78.078143
3331,2023-03-29,98.690002,100.419998,98.559998,100.250000,100.250000,49783300,0.030954,0.030485,98.479801,...,89.733520,89.317498,96.356268,96.356268,67.881840,51.724087,50.402963,59.352930,66.638093,73.269059
3332,2023-03-30,101.550003,103.040001,101.010002,102.000000,102.000000,53633400,0.017456,0.017306,99.359851,...,89.928514,100.000000,100.000000,100.000000,67.609483,51.340025,49.890005,58.005724,62.768809,68.163145
3333,2023-03-31,102.160004,103.489998,101.949997,103.290001,103.290001,56704300,0.012647,0.012568,100.342388,...,90.122199,100.000000,100.000000,100.000000,67.337309,50.956978,49.378402,56.646301,58.812370,62.821341


In [53]:
# Replace 0 by NA
dataset.replace(0, np.nan, inplace=True)
dataset.to_csv("dataset.csv", index=False)

In [54]:
# Check NA and fill them
dataset.isnull().sum()
dataset.iloc[:, 1:] = pd.concat([dataset.iloc[:, 1:].ffill(), dataset.iloc[:, 1:].bfill()]).groupby(level=0).mean()
print(dataset.columns)

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'pct_change', 'log_change', '7ma', '14ma', '21ma', '7macd', '14macd',
       '7rsi', '14rsi', '21rsi', '7atr', '14atr', '21atr', '7upper', '7lower',
       '14upper', '14lower', '21upper', '21lower', '7rsv', '14rsv', '21rsv',
       'FT_3components', 'FT_6components', 'FT_9components', 'FT_27components',
       'FT_81components', 'FT_100components'],
      dtype='object')


In [55]:
# Set the date to datetime data
datetime_series = pd.to_datetime(dataset['Date'])
datetime_index = pd.DatetimeIndex(datetime_series.values)
dataset = dataset.set_index(datetime_index)
dataset = dataset.sort_values(by='Date')
dataset = dataset.drop(columns='Date')

In [56]:
# Get features and target
x_value = pd.DataFrame(dataset.iloc[:, :])
y_value = pd.DataFrame(dataset.iloc[:, 3])

In [57]:
display(y_value)

Unnamed: 0,Close
2010-01-04,6.695000
2010-01-05,6.734500
2010-01-06,6.612500
2010-01-07,6.500000
2010-01-08,6.676000
...,...
2023-03-28,97.239998
2023-03-29,100.250000
2023-03-30,102.000000
2023-03-31,103.290001


In [58]:
# Normalized the data
x_scaler = MinMaxScaler(feature_range=(-1, 1))
y_scaler = MinMaxScaler(feature_range=(-1, 1))
x_scaler.fit(x_value)
y_scaler.fit(y_value)

MinMaxScaler(feature_range=(-1, 1))

In [59]:
x_scale_dataset = x_scaler.fit_transform(x_value)
y_scale_dataset = y_scaler.fit_transform(y_value)

In [60]:
# Reshape the data
'''Set the data input steps and output steps, 
    we use 30 days data to predict 1 day price here, 
    reshape it to (None, input_step, number of features) used for LSTM input'''
n_steps_in = 3
n_features = x_value.shape[1]
n_steps_out = 1

In [62]:
# Get X/y dataset
def get_X_y(x_data, y_data):
    x = list()
    y = list()
    yc = list()

    length = len(x_data)
    for i in range(0, length, 1):
        x_value = x_data[i: i + n_steps_in][:, :]
        y_value = y_data[i + n_steps_in: i + (n_steps_in + n_steps_out)][:, 0]
        yc_value = y_data[i: i + n_steps_in][:, :]
        if len(x_value) == 3 and len(y_value) == 1:
            x.append(x_value)
            y.append(y_value)
            yc.append(yc_value)
    return np.array(x), np.array(y), np.array(yc)        

In [63]:
# get the train test predict index
def predict_index(dataset, x_train, n_steps_in, n_steps_out):

    # get the predict data (remove the in_steps days)
    train_predict_index = dataset.iloc[n_steps_in : x_train.shape[0] + n_steps_in + n_steps_out - 1, :].index
    test_predict_index = dataset.iloc[x_train.shape[0] + n_steps_in:, :].index

    return train_predict_index, test_predict_index


In [64]:
# Split train/test dataset
def split_train_test(data):
    train_size = round(len(x) * 0.7)
    data_train = data[0:train_size]
    data_test = data[train_size:]
    return data_train, data_test

In [65]:
# Get data and check shape
x, y, yc = get_X_y(x_scale_dataset, y_scale_dataset)
x_train, x_test, = split_train_test(x)
y_train, y_test, = split_train_test(y)
yc_train, yc_test, = split_train_test(yc)
index_train, index_test, = predict_index(dataset, x_train, n_steps_in, n_steps_out)

In [67]:
# Define the generator
def Generator(input_dim, output_dim, feature_size) -> tf.keras.models.Model:
    model = Sequential()
    model.add(GRU(units=256,
                  return_sequences=True,
                  input_shape=(input_dim, feature_size),
                  recurrent_dropout=0.02,
                  recurrent_regularizer=regularizers.l2(1e-3)))
    model.add(GRU(units=128,
                  #return_sequences=True,
                  recurrent_dropout=0.02,
                  recurrent_regularizer=regularizers.l2(1e-3)))
    #model.add(Dense(128,
    #              kernel_regularizer=regularizers.l2(1e-3)))
    model.add(Dense(64, kernel_regularizer=regularizers.l2(1e-3)))
    model.add(Dense(32, kernel_regularizer=regularizers.l2(1e-3)))
    #model.add(Dense(16, kernel_regularizer=regularizers.l2(1e-3)))
    #model.add(Dense(8, kernel_regularizer=regularizers.l2(1e-3)))
    model.add(Dense(units=output_dim))
    return model

In [68]:
# Define the discriminator
def Discriminator() -> tf.keras.models.Model:
    model = tf.keras.Sequential()
    model.add(Conv1D(32, input_shape=(4, 1), kernel_size=3, strides=2, padding="same", activation=LeakyReLU(alpha=0.01)))
    model.add(Conv1D(64, kernel_size=3, strides=2, padding="same", activation=LeakyReLU(alpha=0.01)))
    model.add(Conv1D(128, kernel_size=3, strides=2, padding="same", activation=LeakyReLU(alpha=0.01)))
    model.add(Flatten())
    model.add(Dense(220, use_bias=True))
    model.add(LeakyReLU())
    model.add(Dense(220, use_bias=True))
    model.add(ReLU())
    model.add(Dense(1))
    return model