# Initial testing of the model.

1. Referred [Time-Series Prediction by "Hvass Laboratories"](https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/23_Time-Series-Prediction.ipynb)
1. Referred [LSTM Time Series Forecasting Tutorial in Python "Greg Hogg"](https://colab.research.google.com/drive/1b3CUJuDOmPmNdZFH3LQDmt5F0K3FZhqD?usp=sharing)

## Basic Imports

In [63]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(42)
np.random.seed(42)
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
from pprint import pprint
from sklearn.model_selection import train_test_split

## Experimenting with TensorFlow

In [17]:
matrix = tf.constant(np.array([[1, 2], [3, 4]]), dtype=tf.float32)
# pprint(matrix)
# pprint(matrix[0, 1])
# pprint(matrix[1, :2])
# pprint(matrix[tf.newaxis, 1, :2])
v = tf.Variable(matrix)
pprint(v)
v.assign(tf.square(v))
pprint(v)

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[1., 2.],
       [3., 4.]], dtype=float32)>
<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[ 1.,  4.],
       [ 9., 16.]], dtype=float32)>


### Automatic Differentiation

In [61]:
a = tf.Variable([4], dtype=tf.float32)
b = tf.Variable([5], dtype=tf.float32)

tf.function(autograph=False) # tf.function is also available as a decorator
def f(a, b, power=2, d=3):
    return tf.pow(a, power) + d * b

with tf.GradientTape(watch_accessed_variables=True) as tape:
    c = f(a, b)

pprint(tape.gradient(target=c, sources=[a, b]))
print([a, b])

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([8.], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([3.], dtype=float32)>]
[<tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([5.], dtype=float32)>]


### Linear Regression


In [29]:
# ground truth
true_weights = tf.constant(list(range(5)), dtype=tf.float32)[:, tf.newaxis]
# some random training data
x = tf.constant(tf.random.uniform((32, 5)), dtype=tf.float32)
y = tf.constant(x @ true_weights, dtype=tf.float32)
# model parameters
weights = tf.Variable(tf.random.uniform((5, 1)), dtype=tf.float32)
for iteration in range(1001):
    with tf.GradientTape() as tape:
        y_hat = tf.linalg.matmul(x, weights)
        loss = tf.reduce_mean(tf.square(y - y_hat))
    if not (iteration % 100):
        print('mean squared loss at iteration {:4d} is {:5.4f}'.format(iteration, loss))
    gradients = tape.gradient(loss, weights)
    weights.assign_add(-0.05 * gradients)
pprint(weights)

mean squared loss at iteration    0 is 16.0754
mean squared loss at iteration  100 is 0.1749
mean squared loss at iteration  200 is 0.0418
mean squared loss at iteration  300 is 0.0100
mean squared loss at iteration  400 is 0.0024
mean squared loss at iteration  500 is 0.0006
mean squared loss at iteration  600 is 0.0001
mean squared loss at iteration  700 is 0.0000
mean squared loss at iteration  800 is 0.0000
mean squared loss at iteration  900 is 0.0000
mean squared loss at iteration 1000 is 0.0000
<tf.Variable 'Variable:0' shape=(5, 1) dtype=float32, numpy=
array([[1.5865926e-03],
       [9.9995196e-01],
       [2.0007532e+00],
       [3.0000165e+00],
       [3.9979944e+00]], dtype=float32)>


### Auto-graph

In [60]:
import inspect
import time
converted_f = tf.autograph.to_graph(f)
print(inspect.getsource(converted_f))

        def tf__f(a, b, power=None, d=None):
            with ag__.FunctionScope('f', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
                do_return = False
                retval_ = ag__.UndefinedReturnValue()
                try:
                    do_return = True
                    retval_ = (ag__.converted_call(ag__.ld(tf).pow, (ag__.ld(a), ag__.ld(power)), None, fscope) + (ag__.ld(d) * ag__.ld(b)))
                except:
                    do_return = False
                    raise
                return fscope.ret(retval_, do_return)



## Data Preparation

In [68]:
df = pd.read_csv('../Data/cleanned/zusammen.csv', parse_dates=['date'])
df.index = pd.to_datetime(df['date'], format='%d.%m.%Y %H:%M:%S')
df['days'] = df['date'].dt.dayofyear
df = df.drop(['rohwert'], axis=1)
df = df.drop(['date'], axis=1)
df.reset_index(inplace=True)

## Data sorting and splitting

In [69]:
def df_to_X_y(df, window_size=5):
    df_as_np = df.to_numpy()
    X = []
    y = []
    for i in range(len(df_as_np)-window_size):
        row = [[a] for a in df_as_np[i:i+window_size]]
        X.append(row)
        label = df_as_np[i+window_size]
        y.append(label)
    return np.array(X), np.array(y)

In [70]:
WINDOW_SIZE = 5
X1, y1 = df_to_X_y(df, WINDOW_SIZE)
X1.shape, y1.shape

((23905, 5, 1, 7), (23905, 7))

In [67]:
df.head(10)

Unnamed: 0,date,wert,temp,dwpt,rhum,prcp,days
0,2020-01-02 00:00:00,36,1.0,-0.6,89.0,0.0,2
1,2020-01-02 07:00:00,31,0.1,-1.2,91.0,0.0,2
2,2020-01-02 06:00:00,32,-0.3,-1.3,93.0,0.0,2
3,2020-01-02 05:00:00,27,-0.5,-1.2,95.0,0.0,2
4,2020-01-02 04:00:00,38,-0.4,-0.8,97.0,0.0,2
5,2020-01-02 03:00:00,49,0.4,-0.2,96.0,0.0,2
6,2020-01-02 02:00:00,46,0.9,-0.3,92.0,0.0,2
7,2020-01-02 01:00:00,38,0.9,-0.6,90.0,0.0,2
8,2020-01-02 08:00:00,33,0.1,-1.3,90.0,0.0,2
9,2020-01-02 09:00:00,34,0.2,-1.4,89.0,0.0,2


In [71]:
X1[0], y1[0]

(array([[[Timestamp('2020-01-02 00:00:00'), 36, 1.0, -0.6, 89.0, 0.0, 2]],
 
        [[Timestamp('2020-01-02 07:00:00'), 31, 0.1, -1.2, 91.0, 0.0, 2]],
 
        [[Timestamp('2020-01-02 06:00:00'), 32, -0.3, -1.3, 93.0, 0.0, 2]],
 
        [[Timestamp('2020-01-02 05:00:00'), 27, -0.5, -1.2, 95.0, 0.0, 2]],
 
        [[Timestamp('2020-01-02 04:00:00'), 38, -0.4, -0.8, 97.0, 0.0, 2]]],
       dtype=object),
 array([Timestamp('2020-01-02 03:00:00'), 49, 0.4, -0.2, 96.0, 0.0, 2],
       dtype=object))

In [36]:
X1[0].shape, y1[0].shape

((5, 1, 6), (6,))

In [73]:
xTrain, xTest, yTrain, yTest = train_test_split(X1,
                                y1, 
                                test_size=0.3, 
                                random_state=42)
xVali, xTest, yVali, yTest = train_test_split(xTest,
                                yTest, 
                                test_size=0.5, 
                                random_state=42)

In [75]:
xTrain.shape,  xVali.shape, xTest.shape, yTrain.shape, yVali.shape, yTest.shape   

((16733, 5, 1, 7),
 (3586, 5, 1, 7),
 (3586, 5, 1, 7),
 (16733, 7),
 (3586, 7),
 (3586, 7))

In [82]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

model1 = Sequential()
model1.add(InputLayer((16733, 5, 1, 7)))
# model1.add(LSTM(64))
model1.add(Dense(8, 'relu'))
model1.add(Dense(1, 'linear'))

model1.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 16733, 5, 1, 8)    64        
_________________________________________________________________
dense_4 (Dense)              (None, 16733, 5, 1, 1)    9         
Total params: 73
Trainable params: 73
Non-trainable params: 0
_________________________________________________________________


In [83]:
cp1 = ModelCheckpoint('model1/', save_best_only=True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])

In [84]:
model1.fit(xTrain, yTrain, epochs=10, callbacks=[cp1])

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type Timestamp).

In [31]:
X1

array([[[[ 3.60e+01,  1.00e+00, -6.00e-01,  8.90e+01,  0.00e+00,
           2.00e+00]],

        [[ 3.10e+01,  1.00e-01, -1.20e+00,  9.10e+01,  0.00e+00,
           2.00e+00]],

        [[ 3.20e+01, -3.00e-01, -1.30e+00,  9.30e+01,  0.00e+00,
           2.00e+00]],

        [[ 2.70e+01, -5.00e-01, -1.20e+00,  9.50e+01,  0.00e+00,
           2.00e+00]],

        [[ 3.80e+01, -4.00e-01, -8.00e-01,  9.70e+01,  0.00e+00,
           2.00e+00]]],


       [[[ 3.10e+01,  1.00e-01, -1.20e+00,  9.10e+01,  0.00e+00,
           2.00e+00]],

        [[ 3.20e+01, -3.00e-01, -1.30e+00,  9.30e+01,  0.00e+00,
           2.00e+00]],

        [[ 2.70e+01, -5.00e-01, -1.20e+00,  9.50e+01,  0.00e+00,
           2.00e+00]],

        [[ 3.80e+01, -4.00e-01, -8.00e-01,  9.70e+01,  0.00e+00,
           2.00e+00]],

        [[ 4.90e+01,  4.00e-01, -2.00e-01,  9.60e+01,  0.00e+00,
           2.00e+00]]],


       [[[ 3.20e+01, -3.00e-01, -1.30e+00,  9.30e+01,  0.00e+00,
           2.00e+00]],

        [[ 2.70e+