In [164]:
import yfinance as yf
import pandas as pd
import datetime
import tensorflow as tf
import keras
import seaborn
import numpy as np

In [165]:
# Create fixed-window sequences for training and validation data
def create_sequences(X, window_size):
    seq_X = []
    seq_y = []
    for i in range(len(X) - window_size):
        seq_X.append(X[i:i+window_size])
        seq_y.append(X[i+window_size])
    return seq_X, seq_y

In [166]:
def preprocess(data):

    PF = np.polyfit(np.linspace(0,len(data),num=len(data)), np.log(data), 1)

    preprocessed = data / (np.exp(PF[0] * np.linspace(0,len(data),num=len(data)) + PF[1]))
    m = np.mean(preprocessed)
    s = np.std(preprocessed)
    preprocessed = (preprocessed - m)/s

    details = [m, s, PF]

    return preprocessed, details

In [167]:
df = pd.read_excel("/Users/lars/Documents/GitHub/NeuralNetworks_Assignment/M3C.xls")
df = df.iloc[:146,6:26]

df_train = df.iloc[:,:14]
df_test = df.iloc[:,14:]

window_size = 3

observations = []
PF = []

for index, row in df_train.iterrows():
    preprocessed, p = preprocess(np.array(row))
    PF.append(p)
   
    for i in range(len(preprocessed) - window_size):
        observations.append([preprocessed[i:i+window_size],preprocessed[i+window_size], p, i+window_size])
        # observations[index][2].append(i + window_size)

observations

[[array([-1.17520016, -0.88043795, -0.66838325]),
  -0.2499150709619427,
  [1.0014579814355693, 0.05462080775959927, array([0.11662169, 6.9113682 ])],
  3],
 [array([-0.88043795, -0.66838325, -0.24991507]),
  0.24434243245428092,
  [1.0014579814355693, 0.05462080775959927, array([0.11662169, 6.9113682 ])],
  4],
 [array([-0.66838325, -0.24991507,  0.24434243]),
  1.507387994058426,
  [1.0014579814355693, 0.05462080775959927, array([0.11662169, 6.9113682 ])],
  5],
 [array([-0.24991507,  0.24434243,  1.50738799]),
  1.7789263631285912,
  [1.0014579814355693, 0.05462080775959927, array([0.11662169, 6.9113682 ])],
  6],
 [array([0.24434243, 1.50738799, 1.77892636]),
  1.3734238443897704,
  [1.0014579814355693, 0.05462080775959927, array([0.11662169, 6.9113682 ])],
  7],
 [array([1.50738799, 1.77892636, 1.37342384]),
  1.2208711633114158,
  [1.0014579814355693, 0.05462080775959927, array([0.11662169, 6.9113682 ])],
  8],
 [array([1.77892636, 1.37342384, 1.22087116]),
  -0.04988328269230845

In [168]:
np.random.shuffle(observations)
train = observations[:int(np.floor(len(observations)*0.8))]
validation = observations[int(np.floor(len(observations)*0.8)):]


In [169]:
def build_model(x_train, y_train, x_validation, y_validation, window_size, options):
    # Build the FFNN model
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(window_size, 1))) 
    model.add(keras.layers.Dense(options[0][0], activation='relu'))

    if len(options[0]) > 1:
        if len(options[0]) > 2:
            for i in range(1,len(options[0])-1):
                model.add(keras.layers.Dense(options[0][i], activation=options[1]))
                
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])

    # Train the model
    model.fit(x_train, y_train, epochs=options[3], batch_size=options[2], validation_data=(x_validation, y_validation), verbose = 0)

    # Make predictions
    predictions = model.predict(x_validation)

    # Evaluate the model
    loss, accuracy = model.evaluate(x_validation, y_validation)

    return model

In [170]:
x_train = []
y_train = []

for i in range(len(train)):
    x_train.append(train[i][0])
    y_train.append(train[i][1])

x_validation = []
y_validation = []

for i in range(len(validation)):
    x_validation.append(validation[i][0])
    y_validation.append(validation[i][1])

x_train = np.array(x_train).reshape(len(x_train),window_size)
y_train = np.array(y_train).reshape(len(y_train))
x_validation = np.array(x_validation).reshape(len(x_validation),window_size)
y_validation = np.array(y_validation).reshape(len(x_validation))

In [None]:
def reprocess(y, details):
    mean = details[2][0]
    std = details[2][1]
    PF = details[2]
    time = details[3]
    
    return ((y * std) + mean) * np.exp(PF[0] * time + PF[1])


In [171]:
def smape(model, validation):
    smape = 0
    prediction = model.predict(x_validation)
    for i in range(len(validation)):
        observation = validation[i]
        pred = prediction[i]

        x_hat = reprocess(pred, observation)
        x = reprocess(observation[1], observation)

        smape += 2*np.abs(x-x_hat)/(x+x_hat)*100

    smape /= len(validation)

    return smape


In [181]:
lays = [[120,120],[120,120,120],[24,24]]
epochs = [15]
batchSizes = [16]
activationFunctions = ['sigmoid','relu']

options = []

for layer in lays:
    for activation in activationFunctions:
        for batchSize in batchSizes:
            for epoch in epochs:
                options.append([layer, activation, batchSize, epoch, 0, 0])

for i in range(len(options)):
    smape_avg=[]
    for j in range(10):
        model = build_model(x_train, y_train, x_validation, y_validation, window_size, options[i])
        
        smape_avg.append(smape(model, validation))



    options[i][4] = np.mean(smape_avg)
    options[i][5] = np.std(smape_avg)


op = pd.DataFrame(options)
res = op.sort_values(4, ascending=False)
print(res)



In [180]:
op = pd.DataFrame(options)
res = op.sort_values(4)
print(res)

          0        1   2   3          4         5
5  [24, 24]     relu  16  15  11.287649  0.032371
4  [24, 24]  sigmoid  16  15  11.306464  0.017964
1  [12, 12]     relu  16  15  11.362216  0.049244
3  [12, 24]     relu  16  15  11.374914  0.057329
2  [12, 24]  sigmoid  16  15  11.391853  0.062915
0  [12, 12]  sigmoid  16  15  11.393992  0.045638
