# FNN- Global benchmark
### Load packages

In [None]:
import time as time
import pandas as pd
import numpy as np
import os

from sklearn.preprocessing import MinMaxScaler
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from numpy import array, hstack

### Load data

In [None]:
Directory = 'C:/.../TFT_for_Stock_Movement_Prediction/data'

# Target and return feature
CCR = pd.read_csv(os.path.join(Directory, 'CCR.csv'), index_col = [0])

### Features
## Time features - Categorical
time_features = pd.read_csv(os.path.join(Directory, 'time_features.csv'), index_col = [0])['0'].tolist()
for i in range(len(time_features)):
    locals()[time_features[i]] = pd.read_csv(os.path.join(Directory, time_features[i] + '.csv'), index_col = [0])

## Basic historical features
bh_features = pd.read_csv(os.path.join(Directory, 'bh_features.csv'), index_col = [0])['0'].tolist()
for i in range(len(bh_features)):
    locals()[bh_features[i]] = pd.read_csv(os.path.join(Directory, bh_features[i] + '.csv'), index_col = [0])

# Categorical
bh_categorical_features = pd.read_csv(os.path.join(Directory, 'bh_categorical_features.csv'), index_col = [0])['0'].tolist()

# Continuous
bh_continuous_features = pd.read_csv(os.path.join(Directory, 'bh_continuous_features.csv'), index_col = [0])['0'].tolist()

## Technical indicators - Continuous
indicator_features = pd.read_csv(os.path.join(Directory, 'indicator_features.csv'), index_col = [0])['0'].tolist()
for i in range(len(indicator_features)):
    locals()[indicator_features[i]] = pd.read_csv(os.path.join(Directory, indicator_features[i] + '.csv'), index_col = [0])

### Model preparation
#### Define variables

In [None]:
# Study periods length
period_b = 0, 250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250
period_e = 1000, 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000, 3250

# Split period into training, validation and test set
training_size = 750
test_size = 250
validation_split = 0.2

# Target
Target_feature = ['CCR']

# Features
Feature_type = ['int', 'float']
time_varying_known_categoricals = time_features
time_varying_unknown_categoricals = bh_categorical_features
time_varying_unknown_reals = bh_continuous_features + indicator_features
time_varying_reals_to_scale = time_varying_unknown_reals + Target_feature

# Scaler
scaler = MinMaxScaler()

# Model parameters
max_encoder_length = 258
batch_size = 128
max_epochs = 100
learning_rate = 0.001
hidden_size = 50
dropout = 0.20 
optimizer = Adam(learning_rate = learning_rate)
loss = 'mae'

# Early stopping
mode = 'min'
patience = 10
min_delta = 1e-4
cb = EarlyStopping(mode = mode, patience = patience, min_delta = min_delta)

# File path to save results
File_name_results = 'results/FNN/Results_FNN.csv'

#### Preparation of datasets / Functions

In [None]:
# Datasets for each study period and stock
def dataset(period, stock):
    
    global scaler
    
    # Create dataframe
    data = pd.DataFrame(index = globals()[Target_feature[0]].index[period_b[period] : period_e[period]])
    
    ## Add features
    # Target feature
    data['CCR'] = globals()[Target_feature[0]][[globals()[Target_feature[0]].columns[stock]]][period_b[period] : period_e[period]]
    
    # Time varying known categoricals
    for f in range(len(time_varying_known_categoricals)):
        data[time_varying_known_categoricals[f]] = globals()[time_varying_known_categoricals[f]].astype(Feature_type[0])

    # Time varying unknown categoricals
    for f in range(len(time_varying_unknown_categoricals)):
        data[time_varying_unknown_categoricals[f]] = globals()[time_varying_unknown_categoricals[f]][globals()[time_varying_unknown_categoricals[f]].columns[stock]].astype(Feature_type[0])
    
    # Time varying unknown reals
    for f in range(len(time_varying_unknown_reals)):
        data[time_varying_unknown_reals[f]] = globals()[time_varying_unknown_reals[f]][globals()[time_varying_unknown_reals[f]].columns[stock]].astype(Feature_type[1])

    ## Scaling
    # Reset index
    data = data.reset_index(drop=True)
    
    # Scaling real features
    scaler = scaler.fit(data[time_varying_reals_to_scale][0 : training_size])
    data.loc[0 : len(data) - 1, time_varying_reals_to_scale] = scaler.transform(data[time_varying_reals_to_scale][0 : len(data)])

    return data

def dataset_target(period, stock):
    
    # Create dataframe
    data = pd.DataFrame(index = globals()[Target_feature[0]].index[period_b[period] : period_e[period]])
    
    # Target
    data['Target'] = globals()[Target_feature[0]][[globals()[Target_feature[0]].columns[stock]]][period_b[period] : period_e[period]].shift(-1).fillna(0)

    return data

# Sequence split
def split_dataset(data, max_encoder_length):

    X = list()
    
    for i in range(len(data)):
        idx = i + max_encoder_length
        if idx > len(data):
            break
        seq_x = data[i : idx]
        X.append(seq_x)
        
    return array(X)

def split_target(data, max_encoder_length):

    X = list()
    
    for i in range(len(data)):
        idx = i + max_encoder_length
        if idx > len(data):
            break
        seq_x = data[idx - 1]
        X.append(seq_x)
        
    return array(X)

### Model


In [None]:
start = time.time()
Results = pd.DataFrame(index = range(period_b[0], period_e[9] - training_size), columns = locals()[Target_feature[0]].columns)

for i in range(len(period_b)):
    start_period = time.time()
    temp_dataset_list =[]
    temp_target_list =[]
    for j in range(len(locals()[Target_feature[0]].columns)):
        temp_dataset = dataset(i, j)
        temp_target = dataset_target(i, j)
        temp_dataset_list.append(np.array(temp_dataset).reshape(len(temp_dataset), len(temp_dataset.columns)))
        temp_target_list.append(np.array(temp_target).reshape(len(temp_target), len(temp_target.columns)))
    dataset_stack = hstack((temp_dataset_list))   
    target_stack = hstack((temp_target_list))
    X = split_dataset(dataset_stack[ : training_size], max_encoder_length)
    y = split_target(target_stack[ : training_size], max_encoder_length)
    X_test = split_dataset(dataset_stack[training_size - max_encoder_length + 1 : ], max_encoder_length)
    X_shape = X.shape[1] * X.shape[2]
    X = X.reshape((X.shape[0], X_shape))
    X_test = X_test.reshape((X_test.shape[0], X_shape))
    model = Sequential()
    model.add(Dense(hidden_size, activation = 'relu', input_dim = X_shape))
    model.add(Dropout(dropout))
    model.add(Dense(hidden_size, activation = 'relu'))
    model.add(Dropout(dropout))
    model.add(Dense(hidden_size, activation = 'relu'))
    model.add(Dropout(dropout))
    model.add(Dense(len(locals()[Target_feature[0]].columns)))
    model.compile(optimizer = optimizer, loss = loss)
    model.fit(X, y, batch_size = batch_size, validation_split = validation_split, epochs = max_epochs, verbose = 0, callbacks = cb)
    results_temp = model.predict(X_test, verbose = 0)
    Results[period_b[i] : period_b[i] + test_size] = results_temp
    print(f'Compilation time - Period {i + 1}: {round(time.time() - start_period)} seconds')
Results.to_csv(File_name_results)
print(f'Compilation time: {round(time.time() - start)} seconds')