In [1]:
import pywt
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense , Input , LSTM, Bidirectional, Dropout
from tensorflow.keras.models import Model , Sequential
from tensorflow.keras import regularizers

In [2]:
def wavelet_transform(df):
    ca , cb, cc , cd = pywt.wavedec(df['price'].values, 'haar', level = 3)
    cat = pywt.threshold(ca, np.std(ca), mode = 'soft')
    cbt = pywt.threshold(cb, np.std(cb), mode = 'soft')
    cct = pywt.threshold(cc, np.std(cc), mode = 'soft')
    cdt = pywt.threshold(cd, np.std(cd), mode = 'soft')
    coeff = [cat , cbt, cct , cdt]
    return pywt.waverec(coeff, 'haar')

def get_sample(df, length, temporal_horizon):

    temporal_horizon = temporal_horizon - 1
    last_possible = df.shape[0] - temporal_horizon - length

    random_start = np.random.randint(0, last_possible)
    X_sample = df.drop(columns = 'price')[random_start: random_start+length].values
    y_sample = df['price'][random_start+length: random_start+length+temporal_horizon+1]
    
   # if y_sample != y_sample:
        #X_sample, y_sample = get_sample(df, length, temporal_horizon)
    
    return X_sample, y_sample

def get_X_y(df, temporal_horizon, length_of_sequences):
    X, y = [], []

    for len_ in length_of_sequences:
        xi, yi = get_sample(df, len_, temporal_horizon)
        X.append(xi)
        y.append(yi)
        
    return X, np.array(y)


def autoencoder(features):
    input_data = Input(shape=(1, features))
    encoded1 = Dense(features, activation="relu", activity_regularizer=regularizers.l2(0))(input_data)
    one_l = Dense(1, activation="relu", activity_regularizer=regularizers.l2(0))(encoded1)
    decoded = Dense(features, activation="linear", activity_regularizer=regularizers.l2(0))(one_l)
    autoencoder = Model(inputs=input_data, outputs=decoded)
    encoder = Model(input_data, one_l)
    autoencoder.compile(loss = 'mse', optimizer = 'rmsprop',metrics = ['mae'])
    return autoencoder , encoder

## DATA

In [3]:
df  = pd.read_csv('ma_ema.csv')
df.set_index('date', inplace = True)
df_test = df[2970:3361]
df = df[:3000]

In [4]:
df['price'] = wavelet_transform(df)
df.dropna(inplace = True)

## AUTOENCODER

In [5]:
autoencoder , encoder = encoder(14)

NameError: name 'encoder' is not defined

In [None]:
X = np.array(df)
X = X.reshape(len(X), 1, 14)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(monitor = 'val_loss',mode = 'min' , verbose = 1, patience = 20, restore_best_weights = True)
history = autoencoder.fit(X,X,
                    validation_split = 0.3,
                   callbacks = [es], 
                   epochs = 1000, 
                   batch_size = 64,
                   shuffle = True)

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Mean Square Error - Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='best')
    plt.show()
    
    plt.plot(history.history['mae'])
    plt.plot(history.history['val_mae'])
    plt.title('Model loss')
    plt.ylabel('Mean Absolute Error')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='best')
    plt.show()
    
plot_loss(history)

In [None]:
X_encode = autoencoder.predict(X)
X_encode.shape = (X_encode.shape[0], X_encode.shape[2])
new_df = pd.DataFrame(X_encode)
df.reset_index(inplace = True)
new_df['price'] = df['price']

In [None]:
n_days = 30
length = 30
length_of_sequences = [length for x in range(3500)]

X_train, y_train = get_X_y(new_df, n_days, length_of_sequences)

In [None]:
def init_model() :
    model = Sequential()               
    model.add(LSTM(150,activation = 'tanh',input_shape=(length, 14),return_sequences = True))
    model.add(Dropout(0.5))  
    model.add(Bidirectional(LSTM(120, activation = 'tanh',return_sequences=True)))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(100,activation = 'tanh',return_sequences = True)))
    model.add(Dropout(0.5))
    model.add(LSTM(80,activation = 'tanh'))
    model.add(Dense(60,activation = 'relu'))     
    model.add(Dense(n_days,activation = 'linear'))

    model.compile(loss = 'mse', optimizer = 'rmsprop',metrics = ['mae'])

    return model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
model = init_model()
es = EarlyStopping(monitor = 'val_loss',mode = 'min' , verbose = 1, patience = 20, restore_best_weights = True)
history = model.fit(np.array(X_train), y_train,
                    validation_split = 0.3,
                   callbacks = [es], 
                   epochs = 1000, 
                   batch_size = 32)
                   #shuffle = True)

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Mean Square Error - Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='best')
    plt.show()
    
    plt.plot(history.history['mae'])
    plt.plot(history.history['val_mae'])
    plt.title('Model loss')
    plt.ylabel('Mean Absolute Error')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='best')
    plt.show()
    
plot_loss(history)

## TEST

In [None]:
df_test_n = df_test.copy()
df_test_n['price'] = wavelet_transform(df_test_n)[:len(df_test_n)]

df_test = df_test[length:]

prediction = []
for x in range(0,12):
    n = 30*x 
    i = 30 + 30*x
    encode = autoencoder.predict(df_test_n[n:i])
    encode.shape = (1,encode.shape[0], encode.shape[1])
    predict = model.predict(encode)
    prediction.append(predict)

In [None]:
prediction = np.array(prediction)
prediction.shape = (360)

In [None]:
plt.figure(figsize = (18, 7))
plt.plot(prediction, label = 'prediction')
plt.plot(df_test['price'], label = 'truth')
plt.legend()