In [708]:
import pandas as pd
import datetime
import tensorflow as tf
import keras
import seaborn
import numpy as np

In [709]:
# Create fixed-window sequences for training and validation data
def create_sequences(X, window_size):
    seq_X = []
    seq_y = []
    for i in range(len(X) - window_size):
        seq_X.append(X[i:i+window_size])
        seq_y.append(X[i+window_size])
    return seq_X, seq_y

In [710]:
def preprocess(data):

    PF = np.polyfit(np.linspace(0,len(data),num=len(data)), np.log(data), 1)

    preprocessed = data / (np.exp(PF[0] * np.linspace(0,len(data),num=len(data)) + PF[1]))
    m = np.mean(preprocessed)
    s = np.std(preprocessed)
    preprocessed = (preprocessed - m)/s

    details = [m, s, PF]

    return preprocessed, details

In [711]:
def reprocess(y, details):
    mean = details[2][0]
    std = details[2][1]
    PF = details[2][2]
    time = details[3]
    
    return ((y * std) + mean) * np.exp(PF[0] * time + PF[1])

In [712]:
def smape_loss(y_true, y_pred):
    smape = 100 * tf.reduce_mean(2*tf.abs(y_pred - y_true) / (y_true + y_pred))
    return smape


In [713]:
def smape(model, validation):
    smape = 0
    prediction = model.predict(x_validation)
    for i in range(len(validation)):
        observation = validation[i]
        pred = prediction[i]

        x_hat = reprocess(pred, observation)
        x = reprocess(observation[1], observation)

        smape += 2*np.abs(x_hat-x)/(x+x_hat)

    smape /= len(validation)
    smape *=100

    return smape

In [714]:
def build_model(x_train, y_train, x_validation, y_validation, window_size, options):
    # Build the FFNN model
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(window_size, 1))) 
    model.add(keras.layers.Dense(options[0][0], activation='relu'))

    if len(options[0]) > 1:
        if len(options[0]) > 2:
            for i in range(1,len(options[0])-1):
                model.add(keras.layers.Dense(options[0][i], activation=options[1]))
                
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])

    # Train the model
    # model.fit(x_train, y_train, epochs=options[3], batch_size=options[2], validation_data=(x_validation, y_validation), verbose = 0)
    model.fit(x_train, y_train, epochs=options[3], batch_size=options[2], verbose = 0)

    # Make predictions
    predictions = model.predict(x_validation)

    # Evaluate the model
    loss, accuracy = model.evaluate(x_validation, y_validation)

    return model

In [715]:
df = pd.read_excel("/Users/lars/Documents/GitHub/NeuralNetworks_Assignment/M3C.xls")
df = df.iloc[:146,6:26]

df_train = df.iloc[:,:14]
df_test = df.iloc[:,14:]

window_size = 3

observations = []
PF = []

for index, row in df_train.iterrows():
    preprocessed, p = preprocess(np.array(row))
    PF.append(p)
   
    for i in range(len(preprocessed) - window_size):
        observations.append([preprocessed[i:i+window_size],preprocessed[i+window_size], p, i+window_size])     


In [716]:
# Shuffling: dont use for now
# np.random.shuffle(observations)

train = observations[:int(np.floor(len(observations)*0.8))]
validation = observations[int(np.floor(len(observations)*0.8)):]



In [717]:
x_train = []
y_train = []

x_validation = []
y_validation = []

folds = [6,10,13]


for i in range(len(train)):
    x_train.append(train[i][0])
    y_train.append(train[i][1])

for i in range(len(validation)):
    x_validation.append(validation[i][0])
    y_validation.append(validation[i][1])

x_train = np.array(x_train).reshape(len(x_train),window_size)
y_train = np.array(y_train).reshape(len(y_train))
x_validation = np.array(x_validation).reshape(len(x_validation),window_size)
y_validation = np.array(y_validation).reshape(len(x_validation))

In [718]:
lays = [[50,50]]
epochs = [50]
batchSizes = [16]
activationFunctions = ['sigmoid']

options = []

for layer in lays:
    for activation in activationFunctions:
        for batchSize in batchSizes:
            for epoch in epochs:
                options.append([layer, activation, batchSize, epoch, 0, 0])


for i in range(len(options)):
    smape_avg=[]
    for j in range(1):
        model = build_model(x_train, y_train, x_validation, y_validation, window_size, options[i])

        print(options[i])
        
        smape_avg.append(smape(model, validation))

    options[i][4] = np.mean(smape_avg)
    options[i][5] = np.std(smape_avg)


op = pd.DataFrame(options)
res = op.sort_values(4, ascending=False)
print(res)

[[50, 50], 'sigmoid', 16, 50, 0, 0]
          0        1   2   3          4    5
0  [50, 50]  sigmoid  16  50  14.711936  0.0


In [719]:

# for the final model build we should do something better but i focussed on the autoregression for now
lays = [[50,50]]
epochs = [50]
batchSizes = [16]
activationFunctions = ['sigmoid']

options = [layer, activation, batchSize, epoch, 0, 0]

model = build_model(x_train, y_train, x_validation, y_validation, window_size, options)



In [720]:
def smape_clean(y_true, y_pred):
    smape = 100 * np.mean(2*np.abs(y_pred - y_true) / (y_true + y_pred))
    return smape

In [721]:
##TESTING

predictions = pd.DataFrame()
observations = []
window_size = 3

df_full = pd.DataFrame()
df_full = df_train
df_full = df_full.drop(df_full.columns[14:], axis=1)

num_predictions = 6

# Make predictions using autoregressive approach
for pred in range(num_predictions):

    PF = []
    for index, row in df_full.iterrows():
        preprocessed, p = preprocess(np.array(row))
        PF.append(p)
        observations.append([preprocessed[11+pred:14+pred],0, p, 14+pred]) #y is unknown and first time point to predict is 15(or 14?)`

    # Reshape the input for prediction
    x = []
    for i in (range(len(observations))):
        x.append(observations[i][0])
    x = np.array(x).reshape(len(x),window_size)
    
    # Make the prediction
    prediction = model.predict(x)

    y_u = []
    for i in range(len(prediction)):
        y_u.append(reprocess(prediction[i], observations[i]))

    # print(pd.DataFrame(y_u).shape)
    predictions[15+pred] = pd.DataFrame(pd.DataFrame(y_u))
    df_full[15+pred] = pd.DataFrame(y_u)

smapes = pd.DataFrame(columns=[i for i in range(num_predictions)])

for i in range(predictions.shape[0]):
    smape_row = []
    for j in range(num_predictions):
        smape_row.append(smape_clean(predictions.iloc[i, j], df_test.iloc[i, j]))
    smapes.loc[i] = smape_row

print(smapes)

smape_avgs = []
for i in range(num_predictions):
    smape_avgs.append(np.mean(smapes.iloc[:,i]))
print(smape_avgs)



             0          1          2          3          4          5
0     4.457461  17.931971  28.804478  41.651366  48.146654  56.090897
1    20.387434   4.864991  14.915162  15.092009  14.871977  12.113167
2    51.642259  36.462417  41.792754  39.204962  40.942536  53.578291
3    25.127843  13.642950   5.790956  16.566503  10.364864  17.895001
4    10.462952  17.361462  29.811762  37.052851  29.687283  18.990138
..         ...        ...        ...        ...        ...        ...
141   7.437902   0.188281   4.260280   6.702316   1.424700   0.129322
142   9.995316   4.414887  15.593822   7.235593   9.131973   3.482213
143   1.744891  13.655668  16.060816  15.794610  32.966773  49.663972
144  30.191374  39.676759  47.967080  59.600854  73.862974  95.156653
145  35.521796  35.648353  18.157387  10.900166  33.936527  49.413984

[146 rows x 6 columns]
[16.40121573649749, 19.930942326354174, 28.61359902194544, 28.015883396845062, 32.99716355842322, 36.421480662787474]
