##### Setup

In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import tensorflow as tf
tfk = tf.keras
tfkl = tf.keras.layers
import sklearn as skl
import scipy
import random
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics import tsaplots  

import warnings
import os

seed = 33
tf.random.set_seed(seed)
random.seed(seed)
os.environ['PYTHONHASHSEED']=str(seed)
tf.compat.v1.set_random_seed(seed)

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Data Exploration and Preparation

## Data Exploration

## Preprocessing

In [62]:
df = pd.read_excel('FinancialMarketData.xlsx', 'EWS')

Retrieving S&P 500 stock market index from yfinance, creating feature <code>SP_GOLD</code> and appending it to the dataframe.

In [63]:
# defining starting (first row) and ending (last row) dates to download data from yfinance
start_date = str(df['Data'][0])[:10]
end_date = str(df['Data'][df.shape[0]-1])[:10]

# downloading 'close' data from yfinance 
sp500 = yf.download('^GSPC', start=start_date, end=end_date, interval='1wk')
sp500 = sp500['Close']

# defining the new feature SP500/GOLD and renaming the column
sp500_gold = pd.DataFrame(sp500.to_numpy() / df['XAU BGNL'].to_numpy())
sp500_gold.columns = ['SP_GOLD']

# concatenating the new feature to the original dataframe
df = pd.concat([df, sp500_gold], axis=1)

[*********************100%***********************]  1 of 1 completed


In [64]:
reduced_features = ['SP_GOLD', 'Cl1', 'MXCN', 'MXEU', 'MXJP', 'MXRU', 'MXUS', 'VIX', 'GT10', 'ECSURPUS', 'Y']
reduced_df = df[reduced_features]

In [65]:
reduced_df

Unnamed: 0,SP_GOLD,Cl1,MXCN,MXEU,MXJP,MXRU,MXUS,VIX,GT10,ECSURPUS,Y
0,5.172639,25.77,34.30,127.75,990.59,224.33,1416.12,22.50,6.657,0.077,0
1,5.010812,28.85,32.74,129.50,993.98,234.37,1428.79,21.50,6.748,0.043,0
2,4.736758,28.28,32.46,126.48,974.83,216.82,1385.93,23.02,6.692,0.135,0
3,5.037560,28.22,31.29,129.19,1007.12,201.89,1385.31,23.45,6.619,0.191,0
4,4.648525,28.02,31.32,134.67,1034.58,218.00,1411.95,21.25,6.613,0.312,1
...,...,...,...,...,...,...,...,...,...,...,...
1106,2.300134,57.76,110.83,140.32,1199.57,681.21,3800.77,20.30,1.622,0.339,0
1107,2.384887,60.55,108.80,142.76,1206.29,693.38,3835.96,19.61,1.706,0.303,0
1108,2.368409,59.33,111.92,144.13,1194.62,673.20,3955.16,18.12,1.658,0.440,0
1109,2.401730,60.18,108.40,144.23,1197.06,680.23,4027.05,16.65,1.616,0.467,0


Calculate the returns, split the dataset into training and validation and then normalize

<code>train_val_split</code> function is used to split the original dataset into training and validation sets (according to specified <code>train_fraction</code>, which is set to 0.8 by default).

In [14]:
def train_val_split(df, train_fraction = 0.8):

    split_index = int(df.shape[0]*train_fraction)
    train_df = df.iloc[:split_index]
    val_df = df.iloc[split_index:]

    return train_df, val_df

In [15]:
train_df, val_df = train_val_split(df)

# Anomaly Detection

## Copulas

## Autoencoder (fully connected)

## Autoencoder (convolutional)

## LSTM

## Transformers

## Bagging

# Portfolio