In [None]:
# Tensorflow / Keras
from tensorflow import keras # for building Neural Networks
print('Tensorflow/Keras: %s' % keras.__version__) # print version
from keras.models import Sequential, model_from_json # for creating a linear stack of layers for our Neural Network
from keras import Input # for instantiating a keras tensor
from keras.layers import Dense, SimpleRNN, Dropout, LSTM, Bidirectional # for creating regular densely-connected NN layers and RNN layers
from keras.callbacks import EarlyStopping
import tensorflow as tf
from tensorflow.keras import activations
from keras import backend as K
from sklearn.preprocessing import StandardScaler

# Data manipulation
import pandas as pd # for data manipulation
print('pandas: %s' % pd.__version__) # print version
import numpy as np # for data manipulation
print('numpy: %s' % np.__version__) # print version
import math # to help with data reshaping of the data

# Sklearn
import sklearn # for model evaluation
print('sklearn: %s' % sklearn.__version__) # print version
from sklearn.model_selection import train_test_split # for splitting the data into train and test samples
from sklearn.metrics import mean_squared_error # for model evaluation metrics
from sklearn.preprocessing import MinMaxScaler # for feature scaling

# Visualization
import plotly 
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
print('plotly: %s' % plotly.__version__) # print version

#Data fra yahoo finance
import yfinance as yf
print('yfinance: %s' %yf.__version__)

## Anvendte funktioner

#### Modelevaluering

In [None]:
def showTrain(history):
    # Plot training & validation loss values
    fig, ax = plt.subplots(figsize=(9.2, 10.8), sharex=True)
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.title("Model loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    ax.xaxis.set_major_locator(plt.MaxNLocator(len(history.history['loss'])))
    plt.legend(["Train", "Validering"], loc="upper left")
    plt.grid()
    plt.show()

#### Til trading

In [None]:
def visualiser(m_udv, kurs, køb, sælg,pred_priser_clos, priser_close_real):
    plt.rcParams.update({'font.size': 12})
    
    fig, ax1 = plt.subplots(figsize=(16, 9))

    color = '#41719C'
    ax1.set_xlabel('Dage (antal)')
    ax1.set_ylabel('kursudvikling', color=color)
    ax1.plot(kurs, color=color, label = 'S&P500')
    ax1.set_xlim([0, len(pred_priser_clos)])
    #ax1.set_ylim([230, 600])
    ax1.scatter(x_køb, køb, c='#00ff00', label='køb')
    ax1.scatter(x_sælg, sælg, c='#ff0000', label='sælg')
    ax1.tick_params(axis='y', labelcolor=color)
    
    
    right_side = ax1.spines["top"]
    right_side.set_visible(False)
    
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    
    right_side = ax2.spines["top"]
    right_side.set_visible(False)

    color = '#000000'
    ax2.set_ylabel('model afkast', color=color)  # we already handled the x-label with ax1
    ax2.plot(m_udv, color=color, label = 'algoritme afkast')
    ax2.scatter(len(m_udv), list(priser_close_real)[-1]/list(priser_close_real)[0], marker = "X", s=200, label = "Procentuel afkast for indekset i sig selv")
    #ax2.set_ylim([230,600])
    ax2.tick_params(axis='y', labelcolor=color)
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    fig.legend(loc='upper center', prop={'size': 12})

In [None]:
def algo_performance_præd_værdier(pred_priser_clos, priser_close, priser_close_faktisk):
    #Initialiser forskellige parametre
    
    m=1
    m_udv = []
    x_køb = [] #tidspunkt vi køber på
    x_sælg = [] #tidspunkt vi sælger på
    køb_pris = [] #købspris
    sælg_pris = [] #salgspris
    
    handling = ["Start formue"] #handling bruges i løkkerne
    
    index_handling = 0
    index_salg = 0
    index_køb = 0
    
    #Vi køber, hvis den prædikterede værdi i morgen end den faktiske vlrdi i dag
    
    for i in range(len(pred_priser_clos)-1):
        if i==0:
            m_udv.append
        else:
            if pred_priser_clos[i+1]>0 and index_salg>=index_køb:
                m =  m * (1+priser_close[i+1]) 
                handling.append("Køb")
                index_køb = len(handling) - 1- handling[::-1].index("Køb")
                x_køb.append(i)
                køb_pris.append(priser_close_faktisk[i])
            elif pred_priser_clos[i+1]<0 and index_køb>=index_salg:
                #Vi forventer at prisen er morgen er lavere end i dag, og sælger derfor - husk transaktionsomkostninger
                handling.append("Salg")
                index_salg = len(handling) - 1- handling[::-1].index("Salg") 
                x_sælg.append(i)
                sælg_pris.append(priser_close_faktisk[i])
            elif index_køb>=index_salg: 
                #Vi har senest købt aktien, og holder den, hvorfor formuen ændrer sig sammen med aktien
                handling.append("Ingen")
                index_ingen = len(handling) - 1- handling[::-1].index("Ingen")
                m =  m * (1+priser_close[i+1])
            else:
                #Vi har senest solgt aktien, så ingen ændringer i formue
                handling.append("Ingen")
                index_ingen = len(handling) - 1- handling[::-1].index("Ingen")
            m_udv.append(m)  
    return m_udv, køb_pris, sælg_pris, x_køb, x_sælg

## Data indhentning

In [None]:
#Download data fra S&P 500, forkortet til SPY i python
df = yf.download("SPY", "2009-01-01", "2019-12-31")
df = df.reset_index()
df = df.rename(columns = {"Date":"date", "Open": "open", "Close": "close"})
#df = df[['date','open','close']] # Udvælg specifikke søjler

#Vi ønsker at have dato som række-index:
df['date'] = pd.to_datetime(df['date']) 
lukke_priser_train, lukke_priser_test = train_test_split(df.close, test_size=0.2, shuffle=False)

df= df.drop("date", axis=1)



In [None]:
#Plot udvikling af S&P500's lukkekurser siden 2009-01-01
fg, ax =plt.subplots(1,2,figsize=(20,7))
ax[0].plot(df['open'],label='Open',color='green')
ax[0].set_xlabel('Date',size=15)
ax[0].set_ylabel('Price',size=15)
ax[0].legend()

ax[1].plot(df['close'],label='Close',color='red')
ax[1].set_xlabel('Date',size=15)
ax[1].set_ylabel('Price',size=15)
ax[1].legend()

fg.show()

## Data forberedelse

In [None]:
df = df.drop("Adj Close", axis=1)

df['Close Shift'] = df.close.shift(1)
df['High'] = df.High.shift(1)
df['Low']=df.Low.shift(1)
df['Volume'] = df.Volume.shift(1)
df['open'] = df.open.shift(1)

df['Close Shift'] = df['Close Shift'].pct_change()
df['High'] = df['High'].pct_change()
df['Low'] = df['Low'].pct_change()
df['Volume'] = df['Volume'].pct_change()
df['open'] = df['open'].pct_change()
df['close'] = df['close'].pct_change()


df = df.fillna(0)

#Fjern første række
df = df.iloc[1: , :]
df

## RNN Modellen

In [None]:
#Definer Early stop
early_stop = EarlyStopping(monitor='val_loss', patience=10,min_delta=0.00000001)

#Feature variable til RNN
X=df[['open', 'High', 'Low', 'Volume','Close Shift']] 
X_scaled=np.array(X)
#Target variable
Y=df[['close']]
Y_scaled = np.array(Y)

In [None]:
##### Step 2 - Spalter vores kurser op i træning og test
X_train_data, X_test_data = train_test_split(X_scaled, test_size=0.2, shuffle=False)
Y_train_data, y_test = train_test_split(Y_scaled, test_size = 0.2, shuffle = False)

#Validation data:
X_train, X_val, y_train, y_val = train_test_split(X_train_data, Y_train_data, test_size=0.2, shuffle=False)

In [None]:
#Lav til 3-dimensional tensor:
row_train = len(y_train)
row_test = len(y_test)
row_val = len(y_val)

X_train = np.reshape(X_train, (row_train, 1,5))
X_test = np.reshape(X_test_data, (row_test, 1, 5))
X_val = np.reshape(X_val, (row_val, 1, 5))

In [None]:
print(X_train.shape, X_test.shape, X_val.shape, y_train.shape, y_test.shape, y_val.shape)

In [None]:
##### Step 4 - opstil RNN modellens struktur.
model_RNN = Sequential(name="RNN-Model") # Model
model_RNN.add(Input((X_train.shape[1],X_train.shape[2]), name='Input-Layer')) 
model_RNN.add(SimpleRNN(units=64, return_sequences = False,  activation='tanh', name='Hidden-Recurrent-Layer2', 
                        kernel_regularizer=keras.regularizers.l2(0.01)))
model_RNN.add(Dense(units=1, activation='linear', name='Output-Layer')) # Output Layer, Linear(x) = x

In [None]:
##### Step 5 - Compile keras model
optimizer = keras.optimizers.Adam(learning_rate=0.01)
model_RNN.compile(optimizer=optimizer, # default='rmsprop', Adaptive Moment Estimation (Backprop)
              loss='mean_squared_error', # Vores loss funktion, som skal minimeres/optimeres 
              loss_weights=None, 
              weighted_metrics=None,
              run_eagerly=None, 
              steps_per_execution=None 
             )

In [None]:
##### Step 6 - Fit keras model on the dataset
history_RNN = model_RNN.fit(X_train, # input data
          y_train, # target data
          batch_size=64,# Antal af datapunkter per opdatering af gradient. Hvis ikke dette specificeres, så er det 32
          epochs=40, # Hvor mange gange modellen skal køre igennem alt data
          verbose=1, #  1 = progress bar
          callbacks=early_stop, #Hvornår vi ikke længere acceptere små fremskridt.
          validation_data=(X_val, y_val), # default=None, Data on which to evaluate the loss and any model metrics at the end of each epoch. 
         )

In [None]:
##### Step 7 - Brug modellen til at prædiktere
# Prædikter med træningsdata
pred_train_RNN = model_RNN.predict(X_train)
# Prædikter med testdata
pred_test_RNN = model_RNN.predict(X_test)

In [None]:
showTrain(history_RNN)

In [None]:
#Visualiser
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.array(range(0,len(y_test))),
                         y=y_test.flatten(),
                         mode='lines',
                         name='Observerede lukkepriser (Test)',
                         opacity=0.8,
                         line=dict(color='black', width=1)
                        ))
fig.add_trace(go.Scatter(x=np.array(range(0,len(pred_test_RNN))),
                         y=pred_test_RNN.flatten(),
                         mode='lines',
                         name='Prædikterede lukkepriser (Test)',
                         opacity=0.8,
                         line=dict(color='red', width=1)
                        ))

# Ændre baggrundsfarve
fig.update_layout(dict(plot_bgcolor = 'white'))

# Ændre linjer
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black',
                 title='Antal dage' #af testdata
                )

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black',
                 title='Pris'
                )

# Billede titel
fig.update_layout(title=dict(text="Lukkepriser for S&P 500", 
                             font=dict(color='black')),
                  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
                 )

fig.show()

## LSTM Modellen

In [None]:
#Til LSTM
std_scaler = StandardScaler()
X_scaled=std_scaler.fit_transform(X)
Y_scaled = std_scaler.fit_transform(Y)

In [None]:
X_train_seq = []
y_train_seq = []


datasæt_sequences = pd.DataFrame(X_train_data)
datasæt_labels = pd.DataFrame(Y_train_data)

sequence_length = 40

for i in range(sequence_length,(len(datasæt_sequences)-1)): # 50 rækker ad gangen
    X_train_seq.append(datasæt_sequences.iloc[i-sequence_length:i,:])
    y_train_seq.append(datasæt_labels.iloc[i,0])
    
X_train_seq = np.array(X_train_seq)
y_train_seq = np.array(y_train_seq)

X_test_seq = []
y_test_seq = []

datasæt_sequences_test = pd.DataFrame(X_test_data)
datasæt_labels_test = pd.DataFrame(y_test)


for i in range(sequence_length,(len(datasæt_sequences_test)-1)): # 50 rækker ad gangen
    X_test_seq.append(datasæt_sequences_test.iloc[i-sequence_length:i, :])
    y_test_seq.append(datasæt_labels_test.iloc[i, 0])

X_test, y_test = np.array(X_test), np.array(y_test).reshape(len(y_test), 1)

X_test_seq = np.array(X_test_seq)
y_test_seq = np.array(y_test_seq)

X_train_seq, X_vali_seq = train_test_split(X_train_seq, test_size=0.2, shuffle = False)
y_train_seq, y_vali_seq = train_test_split(y_train_seq, test_size=0.2, shuffle = False)

print(X_train_seq.shape, X_vali_seq.shape, X_test_seq.shape, y_train_seq.shape, y_vali_seq.shape, y_test_seq.shape)

In [None]:
optimizer = keras.optimizers.Adam(learning_rate=0.001)

model_LSTM_Final = Sequential()
model_LSTM_Final.add(LSTM(units=128, return_sequences=True, input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])))
model_LSTM_Final.add(LSTM(units=128, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2]),))
                        

model_LSTM_Final.add(Dense(1, activation = "linear"))

model_LSTM_Final.compile(loss='MSE', optimizer=optimizer)

history_LSTM_Final = model_LSTM_Final.fit(X_train_seq, y_train_seq, batch_size = 16, epochs=50,
                     validation_data = [X_vali_seq, y_vali_seq],callbacks=[early_stop], verbose=0)

In [None]:
predictions = model_LSTM_Final.predict(X_test_seq)

In [None]:
fig, ax = plt.subplots(figsize=(15,5), sharex = True)

plt.style.use('seaborn')
plt.plot(y_test.flatten(), color = 'black')
plt.plot(predictions.flatten(), color ='blue')

plt.xlim(0, 400)

plt.title("Faktiske vs. prædikterede ændringer")
plt.ylabel("Procentuel ændring i lukkekurs fra dag til dag")
plt.xlabel("Tid")
plt.legend(["Faktiske værdier", "Prædikterede værdier - Endelig model"], loc= "upper left")

## Algorithmic trading RNN

In [None]:
kurs= []
for i in range(len(y_test)):
    kurs.append(y_test[i])
pred_close_RNN = pred_test_RNN

In [None]:
m_udv, køb_pris, sælg_pris, x_køb, x_sælg = algo_performance_præd_værdier(pred_test_RNN, kurs, list(lukke_priser_test))
visualiser(m_udv, list(lukke_priser_test), køb_pris, sælg_pris, pred_close_RNN, lukke_priser_test)

## Algorithmic trading LSTM

In [None]:
kurs= []
priser_close = y_test
pred_close_LSTM = predictions
for i in range(len(priser_close)):
    kurs.append(priser_close[i])

In [None]:
m_udv, køb_pris, sælg_pris, x_køb, x_sælg = algo_performance_præd_værdier(pred_close_LSTM, kurs, list(lukke_priser_test))
visualiser(m_udv, list(lukke_priser_test), køb_pris, sælg_pris, pred_close_LSTM, list(lukke_priser_test))