In [2]:
import pandas as pd
import datetime
import tensorflow as tf
import keras
import seaborn
import numpy as np

In [3]:
# Create fixed-window sequences for training and validation data
def create_sequences(X, window_size):
    seq_X = []
    seq_y = []
    for i in range(len(X) - window_size):
        seq_X.append(X[i:i+window_size])
        seq_y.append(X[i+window_size])
    return seq_X, seq_y

In [4]:
def preprocess(data):

    PF = np.polyfit(np.linspace(0,len(data),num=len(data)), np.log(data), 1)

    preprocessed = data / (np.exp(PF[0] * np.linspace(0,len(data),num=len(data)) + PF[1]))
    m = np.mean(preprocessed)
    s = np.std(preprocessed)
    preprocessed = (preprocessed - m)/s

    details = [m, s, PF]

    return preprocessed, details

In [5]:
def reprocess(y, details):
    mean = details[2][0]
    std = details[2][1]
    PF = details[2]
    time = details[3]
    
    return ((y * std) + mean) * np.exp(PF[0] * time + PF[1])

In [6]:
def smape_loss(y_true, y_pred):
    smape = 100 * tf.reduce_mean(2*tf.abs(y_pred - y_true) / (y_true + y_pred))
    return smape


In [7]:
def smape(model, validation):
    smape = 0
    prediction = model.predict(x_validation)
    for i in range(len(validation)):
        observation = validation[i]
        pred = prediction[i]

        x_hat = reprocess(pred, observation)
        x = reprocess(observation[1], observation)

        smape += 2*np.abs(x_hat-x)/(x+x_hat)

    smape /= len(validation)
    smape *=100

    return smape

In [8]:
def build_model(x_train, y_train, x_validation, y_validation, window_size, options):
    # Build the FFNN model
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape=(window_size, 1))) 
    model.add(keras.layers.Dense(options[0][0], activation='relu'))

    if len(options[0]) > 1:
        if len(options[0]) > 2:
            for i in range(1,len(options[0])-1):
                model.add(keras.layers.Dense(options[0][i], activation=options[1]))
                
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mse'])

    # Train the model
    # model.fit(x_train, y_train, epochs=options[3], batch_size=options[2], validation_data=(x_validation, y_validation), verbose = 0)
    model.fit(x_train, y_train, epochs=options[3], batch_size=options[2], verbose = 0)

    # Make predictions
    predictions = model.predict(x_validation)

    # Evaluate the model
    loss, accuracy = model.evaluate(x_validation, y_validation)

    return model

In [22]:
df = pd.read_excel("/Users/lars/Documents/GitHub/NeuralNetworks_Assignment/M3C.xls")
df = df.iloc[:146,6:26]

df_train = df.iloc[:,:14]
df_test = df.iloc[:,14:]

window_size = 3

observations = []
PF = []

for index, row in df_train.iterrows():
    preprocessed, p = preprocess(np.array(row))
    PF.append(p)
   
    for i in range(len(preprocessed) - window_size):
        observations.append([preprocessed[i:i+window_size],preprocessed[i+window_size], p, i+window_size])     

1      940.66
2     1084.86
3     1244.98
4     1445.02
5     1683.17
6     2038.15
7     2342.52
8     2602.45
9     2927.87
10    3103.96
11    3360.27
12    3807.63
13    4387.88
14    4936.99
Name: 0, dtype: float64
1     1991.05
2     2306.40
3     2604.00
4     2992.30
5     3722.08
6     5226.62
7     5989.46
8     5614.62
9     5527.00
10    5389.80
11    5384.40
12    3656.20
13    4034.80
14    4230.00
Name: 1, dtype: float64
1     1461.57
2     1692.50
3     2193.82
4     2459.68
5     3246.80
6     4748.86
7     5559.46
8     5292.42
9     5029.40
10    4753.60
11    4344.60
12    2897.40
13    3256.40
14    3525.20
Name: 2, dtype: float64
1      744.54
2     1105.16
3     1417.40
4     1838.04
5     2337.62
6     3094.88
7     4280.04
8     5070.20
9     3675.18
10    3667.38
11    3808.64
12    3114.42
13    3847.20
14    4632.30
Name: 3, dtype: float64
1     4977.18
2     5248.00
3     5370.00
4     6184.89
5     7137.19
6     6743.00
7     7298.00
8     5260.29
9     48

In [10]:
# Shuffling: dont use for now
# np.random.shuffle(observations)

train = observations[:int(np.floor(len(observations)*0.8))]
validation = observations[int(np.floor(len(observations)*0.8)):]



In [11]:
x_train = []
y_train = []

x_validation = []
y_validation = []

folds = [6,10,13]


for i in range(len(train)):
    x_train.append(train[i][0])
    y_train.append(train[i][1])

for i in range(len(validation)):
    x_validation.append(validation[i][0])
    y_validation.append(validation[i][1])

x_train = np.array(x_train).reshape(len(x_train),window_size)
y_train = np.array(y_train).reshape(len(y_train))
x_validation = np.array(x_validation).reshape(len(x_validation),window_size)
y_validation = np.array(y_validation).reshape(len(x_validation))

In [13]:
lays = [[50,50]]
epochs = [50]
batchSizes = [16]
activationFunctions = ['sigmoid']

options = []

for layer in lays:
    for activation in activationFunctions:
        for batchSize in batchSizes:
            for epoch in epochs:
                options.append([layer, activation, batchSize, epoch, 0, 0])


for i in range(len(options)):
    smape_avg=[]
    for j in range(1):
        model = build_model(x_train, y_train, x_validation, y_validation, window_size, options[i])

        print(options[i])
        
        smape_avg.append(smape(model, validation))

    options[i][4] = np.mean(smape_avg)
    options[i][5] = np.std(smape_avg)


op = pd.DataFrame(options)
res = op.sort_values(4, ascending=False)
print(res)

[[50, 50], 'sigmoid', 16, 50, 0, 0]
          0        1   2   3          4    5
0  [50, 50]  sigmoid  16  50  14.770291  0.0


In [None]:

# for the final model build we should do something better but i focussed on the autoregression for now
lays = [[50,50]]
epochs = [50]
batchSizes = [16]
activationFunctions = ['sigmoid']

options = [layer, activation, batchSize, epoch, 0, 0]

model = build_model(x_train, y_train, x_validation, y_validation, window_size, options)

In [34]:
##TESTING

predictions = []
observations = []
window_size = 3

x = df_train.iloc[:,-4:-1]

for index, row in x.iterrows():
    preprocessed, p = preprocess(np.array(row))
    PF.append(p)
   
    observations.append([preprocessed,0, p, 15]) #y is unknown and first time point to predict is 15(or 14?)`

num_predictions = 6

# Make predictions using autoregressive approach
for pred in range(num_predictions):
    # Reshape the input for prediction
    x = []
    for i in (range(len(observations))):
        x.append(observations[i][0])
    x = np.array(x).reshape(len(x),window_size)
    
    # Make the prediction
    prediction = model.predict(x)

    predictions.append(prediction)

    for i in range(len(prediction)):
        observation[]
    
#     # Store the prediction
#     predictions.append(prediction)
    
#     # Update the initial input for the next prediction
#     initial_input.append(prediction)

# # Calculate SMAPE score for predictions
# smape_score = smape(np.array(predictions), y_validation[-num_predictions:])
# print("SMAPE score: %.2f%%" % smape_score)


[[0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00027046]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00027046]
 [0.00027046]
 [0.00027046]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00027046]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00027046]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00531618]
 [0.00027046]
 [0.00531618]
 [0.00