# WS 04, 05

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense 

In [None]:
# load the dataset
path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/monthly-car-sales.csv'
df = pd.read_csv(path, usecols=['Sales']) 
df.head()
# df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
n_train = int(df.shape[0] * 0.80) 
n_train

In [None]:
df.plot()
plt.axvline(n_train, c='m', lw=1.0)
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler()
data_sc = sc.fit_transform(df.values)
data_sc[:5]

In [None]:
plt.figure(figsize=(9,3.5))
plt.plot(data_sc)
plt.axvline(n_train-1, c='m', lw=1.0)
plt.show()

In [None]:
# convert into dataset matrix
def convertToMatrix(data, step=1): 
    X, Y = [], []
    for i in range(len(data)-step):
        d = i + step  
        X.append(data[i:d,])
        Y.append(data[d,])
    return np.array(X), np.array(Y)

In [None]:
train, test = data_sc[0:n_train], data_sc[n_train:]
train.shape, test.shape


In [None]:
step = 1  #  

X_train, y_train = convertToMatrix(train, step)  
X_test, y_test = convertToMatrix(test, step)

print('Train Test (after conversion):', X_train.shape, X_test.shape)

In [None]:
print(X_train[:5])

In [None]:
print(y_train[:7])

In [None]:

y_train.shape, y_test.shape

## Model

In [None]:
from tensorflow.keras.layers import GRU

model = Sequential()

model.add(GRU(units=32, input_shape=(step, 1), activation="relu"))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam') 

model.summary()

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, 'model-car-sales.png', show_shapes=True, show_layer_names=False)# , show_dtype=True, 

In [None]:
history = model.fit(X_train, y_train, epochs=20, batch_size=1, verbose=0)

In [None]:
plt.plot(history.history['loss'], label='Train loss')
plt.legend()
plt.show()

In [None]:
score = model.evaluate(X_test, y_test, verbose=0)
print(score)

In [None]:
trainPredict = model.predict(X_train)
testPredict = model.predict(X_test)
trainPredict.shape, testPredict.shape

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

trainPredict_inv = sc.inverse_transform(trainPredict)
testPredict_inv = sc.inverse_transform(testPredict)
y_train_inv = sc.inverse_transform(y_train)
y_test_inv = sc.inverse_transform(y_test)

r2_train = r2_score(y_train_inv, trainPredict_inv)
r2_test = r2_score(y_test_inv, testPredict_inv) #

rmse_train = np.sqrt(mean_squared_error(y_train_inv, trainPredict_inv))
rmse_test = np.sqrt(mean_squared_error(y_test_inv, testPredict_inv))

print('R2 Train: {:.3f}'. format(r2_train))
print('R2 Test: {:.3f}'. format(r2_test))

print('RMSE Train: {:.3f}'. format(rmse_train))
print('RMSE Test: {:.3f}'. format(rmse_test))

In [None]:

prefix = np.empty(step).reshape(-1, 1)
prefix.fill(np.nan)

trainPredict_inv = np.concatenate((prefix, trainPredict_inv), axis=0) 
testPredict_inv = np.concatenate((prefix, testPredict_inv), axis=0) 

predict_inv = np.concatenate((trainPredict_inv,testPredict_inv),axis=0)

In [None]:
plt.figure(figsize=(8,3.1))
plt.plot(df.values, lw=1.0, label='Dataset')

plt.plot(predict_inv,'r', lw=2.4, label='Predict')
plt.axvline(n_train-step-1, c='m', lw=1.0)
plt.legend()
plt.show()

### Predict

In [None]:
X_input = X_train[-step:]

print(X_input)
X_input.shape

In [None]:
pred = model.predict(X_input)
print(pred.shape)
print(pred)
pred_inv = sc.inverse_transform(pred)

print('Prediction:', pred_inv[-1][-1])

In [None]:
X_train.shape

In [None]:
end = X_train.shape[0] + 1
n_back = 6
avg_cal = df[end-n_back-1:end-1].sum() / df[end-n_back-1:end-1].count()
avg_cal = avg_cal.values[0]
print('Estimate the Average: ', avg_cal.round(2))

In [None]:
y_true = y_train[-1].reshape(-1,1)
y_true = sc.inverse_transform(y_true)
y_true = np.squeeze(y_true)
print('Actual:', y_true)

In [None]:
h_axis = X_train.shape[0] + 1  #+ step -1
print(h_axis)

train_inv = sc.inverse_transform(train)

plt.figure(figsize=(8-1,3.3))
plt.plot(df[:n_train-step], lw=1.0, label='Dataset (Train)')

plt.scatter(h_axis, y_true, s=75, label='True')
plt.scatter(h_axis, pred_inv[-1], c='r' , marker='s', s=40, label='Prediction')
plt.scatter(h_axis, avg_cal, c='m' , marker='x', s=100, label='Average')

plt.legend()
plt.show()

In [None]:
X_new_inv = sc.inverse_transform(X_new.reshape(-1,1))
print('X new input:', np.squeeze(X_new_inv))

In [None]:
df[end-5:end+5]

In [None]:

h_axis = df.shape[0] + step -1
print(h_axis)

X_new = X_test[-step:]
X_new.shape

pred = model.predict(X_new)
pred_inv = sc.inverse_transform(pred)
print('Prediction:', pred_inv[-1][-1])

In [None]:
n_back = 6
avg_cal = df[- n_back:].sum() / n_back
avg_cal = avg_cal.values[0]
print('Estimate the Average: ', avg_cal.round(2))

In [None]:
plt.figure(figsize=(9,3.5))

plt.plot(df, lw=1.0, label='Dataset')
# plt.plot(predict_inv,'g--', label='Pred')
plt.plot(predict_inv,'r-',lw=2.0, label='Predict')
plt.scatter(h_axis, pred_inv[-1], c='r' , marker='s', s=40, label='Prediction')
plt.scatter(h_axis, avg_cal, c='m' , marker='x', s=100, label='Average')
plt.axvline(n_train-step-1, c='m', lw=.5)
plt.legend()
plt.show()

# plt.plot(pred_inv)
plt.show()


### Poly

In [None]:
X = df.index.values.reshape(-1,1)
y = df.values     

In [None]:
X[:5]
X.shape

In [None]:
y[:5]
y.shape

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree=2)
x_poly = poly_features.fit_transform(X)

model_poly = LinearRegression()
model_poly.fit(x_poly, y)

In [None]:
h_axis = X_train.shape[0] + 1 
print(h_axis)

y_poly_pred = model_poly.predict(x_poly)

train_inv = sc.inverse_transform(train)

plt.figure(figsize=(8-1,3.3))
plt.plot(df[:n_train-step], lw=1.0, label='Dataset')

plt.scatter(h_axis, y_true, s=75, label='True')
plt.scatter(h_axis, pred_inv[-1], c='r' , marker='s', s=40, label='Predict')
plt.scatter(h_axis, avg_cal, c='m' , marker='x', s=100, label='Average')

plt.plot(X, y_poly_pred, lw=1.5, color='r', label='Poly Reg')  

plt.legend(loc=4)

plt.show()

In [None]:

x_input = [[141]]
x_input = [[109]]  
y_poly_pred2 = model_poly.predict(poly_features.fit_transform(x_input))
y_poly_pred2 = np.squeeze(y_poly_pred2)
print('Prediction', y_poly_pred2.round(3))



In [None]:
print('Actual data:', y_true)
print('LSTM Prediction:', pred_inv[-1][-1].round(2))
print('Poly Prediction:', y_poly_pred2.round(2))
print('Averge Prediction:', avg_cal.round(2))