# Multivariate time series forecasting using LSTM using cycle-feature.

In [174]:
# %reset -f
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import rcParams
import seaborn as sns
sns.set()
rcParams['figure.figsize'] = 10,10
from sklearn.preprocessing import StandardScaler 
from functionsAll import lstmMultiSplit, originalToRecurring, recurringToOrignal
import warnings
warnings.filterwarnings('ignore')

## Data cleaning.

In [178]:
df = pd.read_csv("../Data/cleanned/zusammen.csv", parse_dates=["date"])
df['dayofyear'] = df['date'].dt.dayofyear
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['weekday'] = df['date'].dt.weekday
df['hour'] = df['date'].dt.hour
df['day'] = df['date'].dt.day
df['week'] = df['date'].dt.week
df.drop(columns=['date','rohwert'], inplace= True)
# Conclude with 11 features, 1 label.
df = df[[ 'wert', 'day', 'month', 'dayofyear', 'year', 'weekday', 'week', 'hour','temp', 'dwpt', 'rhum', 'prcp']]
df.dropna(inplace=True)
df.reset_index(inplace=True, drop=True)

In [179]:
cyDf = df[['day','month','dayofyear','weekday', 'week', 'hour']] # [32,13,367,7,54,52]
scDf = df[['wert','year' , 'temp', 'dwpt', 'rhum', 'prcp']]

## Data normalizing

In [180]:
cyDf['day']=cyDf['day'].apply(lambda col: originalToRecurring(ogUnits=col,period=32))
cyDf['month']=cyDf['month'].apply(lambda row: originalToRecurring(ogUnits=row,period=13))
cyDf['dayofyear']=cyDf['dayofyear'].apply(lambda row: originalToRecurring(ogUnits=row,period=367))
cyDf['weekday']=cyDf['weekday'].apply(lambda row: originalToRecurring(ogUnits=row,period=7))
cyDf['week']=cyDf['week'].apply(lambda row: originalToRecurring(ogUnits=row,period=54))
cyDf['hour']=cyDf['hour'].apply(lambda row: originalToRecurring(ogUnits=row,period=25))

In [181]:
scaler = StandardScaler()
scaler = scaler.fit(scDf)
scaledDf = scaler.transform(scDf)

### Combine in one dataframe. [cycle + normalize]

In [182]:
cyDf['wert'] = scaledDf[:,0]
cyDf['year'] = scaledDf[:,1]
cyDf['temp'] = scaledDf[:,2]
cyDf['dwpt'] = scaledDf[:,3]
cyDf['rhum'] = scaledDf[:,4]
cyDf['prcp'] = scaledDf[:,5]
mainDf = cyDf[['wert','day', 'month', 'dayofyear', 'weekday', 'week', 'hour', 'year', 'temp', 'dwpt', 'rhum', 'prcp']]

In [206]:
mainDf.head(5)

Unnamed: 0,wert,day,month,dayofyear,weekday,week,hour,year,temp,dwpt,rhum,prcp
0,1.680347,"[[0.3826834323650898, 0.9238795325112867]]","[[0.4647231720437685, 0.8854560256532099]]","[[0.03423410142459154, 0.9994138413588491]]","[[0.43388373911755823, -0.900968867902419]]","[[0.11609291412523023, 0.993238357741943]]","[[0.0, 1.0]]",-1.159369,-1.566281,-1.38563,0.792786,-0.154285
1,1.188967,"[[0.3826834323650898, 0.9238795325112867]]","[[0.4647231720437685, 0.8854560256532099]]","[[0.03423410142459154, 0.9994138413588491]]","[[0.43388373911755823, -0.900968867902419]]","[[0.11609291412523023, 0.993238357741943]]","[[0.9822872507286887, -0.1873813145857246]]",-1.159369,-1.691633,-1.490882,0.903869,-0.154285
2,1.287243,"[[0.3826834323650898, 0.9238795325112867]]","[[0.4647231720437685, 0.8854560256532099]]","[[0.03423410142459154, 0.9994138413588491]]","[[0.43388373911755823, -0.900968867902419]]","[[0.11609291412523023, 0.993238357741943]]","[[0.9980267284282716, 0.06279051952931353]]",-1.159369,-1.747346,-1.508424,1.014952,-0.154285
3,0.795863,"[[0.3826834323650898, 0.9238795325112867]]","[[0.4647231720437685, 0.8854560256532099]]","[[0.03423410142459154, 0.9994138413588491]]","[[0.43388373911755823, -0.900968867902419]]","[[0.11609291412523023, 0.993238357741943]]","[[0.9510565162951535, 0.30901699437494745]]",-1.159369,-1.775202,-1.490882,1.126034,-0.154285
4,1.876899,"[[0.3826834323650898, 0.9238795325112867]]","[[0.4647231720437685, 0.8854560256532099]]","[[0.03423410142459154, 0.9994138413588491]]","[[0.43388373911755823, -0.900968867902419]]","[[0.11609291412523023, 0.993238357741943]]","[[0.8443279255020151, 0.5358267949789965]]",-1.159369,-1.761274,-1.420714,1.237117,-0.154285


## Data windowing and splitting

In [183]:
mianNp = mainDf.to_numpy()

In [186]:
train = mianNp[:23000]
test = mianNp[23000:] 

In [187]:
trainX, trainY = lstmMultiSplit(df=train, n_past=5, n_future=1)
testX, testY = lstmMultiSplit(df=test, n_past=5, n_future=1)

In [202]:
trainX.shape

(22995, 5, 12)

In [199]:
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(trainY.shape[1]))

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 5, 128)            72192     
_________________________________________________________________
lstm_7 (LSTM)                (None, 5, 64)             49408     
_________________________________________________________________
lstm_8 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 134,049
Trainable params: 134,049
Non-trainable params: 0
_________________________________________________________________


In [200]:
history = model.fit(trainX, trainY, epochs=30, batch_size=12, validation_split=0.1, verbose=1)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).