In [None]:
import pandas as pd
import pandas_ta as ta
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

In [None]:
# Load The Data
df = pd.read_csv("EURCHF.csv", index_col=0, parse_dates=True)
df = df.drop(['Adj Close'], axis=1)

In [None]:
# View the stock price chart
figure(figsize=(15, 8))
plt.plot(df['Close'])

In [None]:
# FEATURE ENGINEERING

# Momentum
df['AO'] = ta.ao(df.High, df.Low)
df['CCI'] = ta.cci(df.High, df.Low, df.Close, 13)
df['RSI'] = ta.rsi(df.Close, 13)

# Overlap
df['EMA'] = ta.ema(df.Close, 20) / df.Close - 1
df['SMA'] = ta.sma(df.Close, 20) / df.Close - 1
df['SINWMA'] = ta.sinwma(df.Close, 20) / df.Close - 1

# Stats
df['Z'] = ta.zscore(df.Close, 50)

# Trend
df['AROON'] = ta.aroon(df.High, df.Low, 50).iloc[:,-1]
df['CHOP'] = ta.chop(df.High, df.Low, df.Close, 50)

# Volatility
df['ABERRATION'] = ta.aberration(df.High, df.Low, df.Close, 50).iloc[:,-1]
df['MASS'] = ta.massi(df.High, df.Low, 50)

# Volume
df['ADOSC'] = ta.adosc(df.High, df.Low, df.Close, df.Volume)

In [None]:
df

In [None]:
# TARGET PREPARATION
df['NEXT DAY RETURN'] = df['Close'].pct_change(1).shift(-1) * 100
df.dropna(inplace=True)

In [None]:
# X, Y, Train/Test Split
split_ratio = 0.8
split_idx = int(len(df)*split_ratio)

# Regression
X_train, Y_train = df.iloc[:split_idx, :-1], df.iloc[:split_idx, -1]
X_test, Y_test   = df.iloc[split_idx:, :-1], df.iloc[split_idx:, -1]

# Classification (direction of price movement)
Y_train_class = (Y_train >= 0)*2.0 - 1.0
Y_test_class  = (Y_test  >= 0)*2.0 - 1.0

In [None]:
# Initialize wide variety of non-NN ML models
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm          import SVC, SVR
from sklearn.neighbors    import KNeighborsClassifier, KNeighborsRegressor
from sklearn.tree         import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble     import AdaBoostClassifier, AdaBoostRegressor, RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor

# Load relevant performance metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, f1_score, roc_auc_score 

In [None]:
# Initialize wide variety of Machine Learning models
LR = LinearRegression()
LR_c = LogisticRegression()

SVM = SVR()
SVM_c = SVC()

KNN = KNeighborsRegressor()
KNN_c = KNeighborsClassifier()

DT = DecisionTreeRegressor()
DT_c = DecisionTreeClassifier()

ADA = AdaBoostRegressor()
ADA_c = AdaBoostClassifier()

RF = RandomForestRegressor()
RF_c = RandomForestClassifier()

GB = GradientBoostingRegressor()
GB_c = GradientBoostingClassifier()

regressors = [LR, SVM, KNN, DT, ADA, RF, GB]
classifiers = [SVM_c, KNN_c, DT_c, ADA_c, RF_c, GB_c]

In [None]:
regression_mae = {}
classification_acc = {}
classification_auc = {}


# Train and evaluate regressors
for regressor in regressors:
    name = str(regressor)[:-2]
    regressor.fit(X_train, Y_train)
    mae = mean_absolute_error(Y_test, regressor.predict(X_test))
    regression_mae[name] = mae

# Train and evaluate classifiers
for classifier in classifiers:
    name = str(classifier)[:-2]
    classifier.fit(X_train, Y_train_class)
    
    acc = accuracy_score(Y_test_class, classifier.predict(X_test))
    auc = roc_auc_score(Y_test_class, classifier.predict(X_test))
    
    classification_acc[name] = acc
    classification_auc[name] = auc

In [None]:
# Load all nessesary Deep Learning Libraries

import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

from sklearn.preprocessing import StandardScaler

In [None]:
# Neural Networks Need Scaled Data
ss = StandardScaler()
ss.fit(X_train)
X_train_scaled = ss.transform(X_train)
X_test_scaled = ss.transform(X_test)

earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, min_delta=1e-4, mode='min')

# Simple ANN (MLP) - Regression
ANN = Sequential()
ANN.add(Dense(12, input_shape=(X_train_scaled.shape[1],), activation='linear'))
ANN.add(Dense(8, activation='linear'))
ANN.add(Dense(4, activation='linear'))
ANN.add(Dense(2, activation='linear'))
ANN.add(Dense(1, activation='linear'))
ANN.compile(loss = "mean_absolute_error", optimizer=Adam(learning_rate=0.001))

ANN.fit(X_train_scaled, Y_train, epochs=100, validation_data=(X_test_scaled, Y_test), callbacks=[earlyStopping, mcp_save, reduce_lr_loss])

mae = mean_absolute_error(Y_test, ANN.predict(X_test_scaled))
regression_mae["ANN"] = mae


# Simple ANN (MLP) - Classification
ANN = Sequential()
ANN.add(Dense(12, input_shape=(X_train_scaled.shape[1],), activation='linear'))
ANN.add(Dense(8, activation='linear'))
ANN.add(Dense(4, activation='linear'))
ANN.add(Dense(2, activation='relu'))
ANN.add(Dense(1, activation='sigmoid'))
ANN.compile(loss = "binary_crossentropy", optimizer=Adam(learning_rate=0.001))

ANN.fit(X_train_scaled, Y_train_class, epochs=100, validation_data=(X_test_scaled, Y_test_class), callbacks=[earlyStopping, mcp_save, reduce_lr_loss])

acc = accuracy_score(Y_test_class, (ANN.predict(X_test_scaled) > 0.5)*2.0 - 1.0)
auc = roc_auc_score(Y_test_class, (ANN.predict(X_test_scaled) > 0.5)*2.0 - 1.0)

classification_acc["ANN"] = acc
classification_auc["ANN"] = auc

In [None]:
X_train_scaled_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_scaled_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

# lstm - Regression
lstm = Sequential()
lstm.add(LSTM(12, input_shape=(1, X_train_scaled_lstm.shape[2]), activation='linear'))
lstm.add(Dense(8, activation='linear'))
lstm.add(Dense(4, activation='linear'))
lstm.add(Dense(2, activation='linear'))
lstm.add(Dense(1, activation='linear'))
lstm.compile(loss = "mean_absolute_error", optimizer=Adam(learning_rate=0.001))

lstm.fit(X_train_scaled_lstm, Y_train, epochs=100, validation_data=(X_test_scaled_lstm, Y_test), callbacks=[earlyStopping, mcp_save, reduce_lr_loss])

mae = mean_absolute_error(Y_test, lstm.predict(X_test_scaled_lstm))
regression_mae["lstm"] = mae


# Simple ANN - Classification
lstm = Sequential()
lstm.add(LSTM(12, input_shape=(1, X_train_scaled_lstm.shape[2]), activation='linear'))
lstm.add(Dense(8, activation='linear'))
lstm.add(Dense(4, activation='linear'))
lstm.add(Dense(2, activation='relu'))
lstm.add(Dense(1, activation='sigmoid'))
lstm.compile(loss = "binary_crossentropy", optimizer=Adam(learning_rate=0.001))

lstm.fit(X_train_scaled_lstm, Y_train_class, epochs=100, validation_data=(X_test_scaled_lstm, Y_test_class), callbacks=[earlyStopping, mcp_save, reduce_lr_loss])

acc = accuracy_score(Y_test_class, (lstm.predict(X_test_scaled_lstm) > 0.5)*2.0 - 1.0)
auc = roc_auc_score(Y_test_class, (lstm.predict(X_test_scaled_lstm) > 0.5)*2.0 - 1.0)

classification_acc["lstm"] = acc
classification_auc["lstm"] = auc

In [None]:
import autokeras as ak

predict_from = 1
predict_until = 1
lookback = 3

# Regression

clf = ak.TimeseriesForecaster(
    lookback=lookback,
    predict_from=predict_from,
    predict_until=predict_until,
    max_trials=1,
    objective="val_loss",
)

# Train the TimeSeriesForecaster with train data
clf.fit(
    x=X_train_scaled,
    y=Y_train,
    validation_data=(X_test_scaled, Y_test),
    batch_size=32,
    epochs=10,
)

# Predict with the best model(includes original training data).
mae = mean_absolute_error(Y_test, lstm.predict(X_test_scaled_lstm))
regression_mae["autokeras"] = mae


# Classification

clf_class = ak.TimeseriesForecaster(
    lookback=lookback,
    predict_from=predict_from,
    predict_until=predict_until,
    max_trials=1,
    objective="val_loss",
)

# Train the TimeSeriesForecaster with train data
clf_class.fit(
    x=X_train_scaled,
    y=Y_train,
    validation_data=(X_test_scaled, Y_test),
    batch_size=32,
    epochs=10,
)

# Predict with the best model(includes original training data).
acc = accuracy_score(Y_test_class, (clf_class.predict(X_test_scaled_lstm) > 0.5)*2.0 - 1.0)
auc = roc_auc_score(Y_test_class, (clf_class.predict(X_test_scaled_lstm) > 0.5)*2.0 - 1.0)

classification_acc["autokeras"] = acc
classification_auc["autokeras"] = auc

In [None]:
pd.Series(regression_mae.values(), index=regression_mae.keys()).sort_values()

In [None]:
pd.Series(classification_acc.values(), index=classification_acc.keys()).sort_values(ascending=False)

In [None]:
pd.Series(classification_auc.values(), index=classification_auc.keys()).sort_values(ascending=False)