In [None]:

#* Dane 30tyś iteracji
#* Wystapienie awarii po 20tyś iteracji
    
#* Wen, Qingsong, et al. "Transformers in Time Series: A Survey." arXiv preprint arXiv:2202.07125 (2022).
#* https://arxiv.org/pdf/2202.07125.pdf

#* Online Machine Learning
#* https://analyticsindiamag.com/how-to-learn-from-streaming-data-with-creme-in-python/

#* Timeseries Forecasting
#* https://www.tensorflow.org/tutorials/structured_data/time_series

In [None]:

#* Czy każdy model powinien być wyuczony dla jednego połączenia

#* Czy w ramach predykcji powinniśmy analizować działanie także model zaczyna predykcje od N ostatnich próbek

#* Następnie w każdym kolejnym kroku wykorzystuje poprzednie N-1 próbek oraz ostatnio przewidzianą próbkę
#* W ten sposób po N krokach każda kolejna predykcja będzie wykonywana na podstawie jedynie przewidzianych próbek
#* Bez udziału próbek realnych

In [None]:
# Utility imports
import os
import tqdm
from collections import defaultdict

# Math and matrix manipulation imports
import numpy as np
import pandas as pd

# Graphing imports
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import layers
from tensorflow.python.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_percentage_error

In [None]:
# Tensorflow required
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), 'Physical GPUs,', len(logical_gpus), 'Logical GPUs')
    except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
DATA_PATH = "./datasets/traffic/"
# Lista pozwalajaca zobaczyc skladowe generowanego sygnalu
list_of_datafiles = os.listdir(DATA_PATH)
list_of_datafiles = sorted([int(x.replace('.txt', '')) for x in list_of_datafiles])

In [None]:
type(list_of_datafiles[0])

In [None]:
df = defaultdict(list)

# TODO: Zmienic w funkcje
# TODO: Dodac multiprocessing
for file in tqdm.tqdm(list_of_datafiles):
    temp = np.loadtxt(f"{DATA_PATH}{str(file)}.txt")
    #temp = np.loadtxt(f"{DATA_PATH}{file}")
    df['5->8'].append(temp[5][8])
    df['8->5'].append(temp[8][5])
    df['5->12'].append(temp[5][12])
    df['8->12'].append(temp[8][12])

del temp

In [None]:
df = pd.DataFrame.from_dict(df)
train_df = df[:20000]
test_df = df[20000:].reset_index().drop('index', axis=1)

In [None]:
train_df.head()

In [None]:
df.shape[0]

In [None]:
test_df.head()

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(15,7))
sns.boxplot(data=train_df, ax=axs[0])
sns.boxplot(data=test_df, ax=axs[1])
axs[0].set_title('Train dataset')
axs[1].set_title('Test dataset')

In [None]:
def plot_data(target: str):
    fig, axs = plt.subplots(ncols=1, nrows=3, figsize=(30,7))
    sns.lineplot(data=train_df, x=np.arange(0, train_df.shape[0], 1), y=target, ax=axs[0]).set_title(f"Wizualizacja zbioru treningowego {target}")
    sns.lineplot(data=test_df, x=np.arange(0, test_df.shape[0], 1), y=target, ax=axs[1]).set_title(f"Wizualizacja zbioru testowego {target}")
    sns.lineplot(data=df, x=np.arange(0, df.shape[0], 1), y=target, ax=axs[2]).set_title(f"Wizualizacja całego zbioru {target}")

In [None]:
plot_data('5->8')

In [None]:
plot_data('8->5')

In [None]:
plot_data('5->12')

In [None]:
plot_data('8->12')

In [None]:
train_df.describe()

In [None]:
test_df.describe()

In [None]:
def split_sequence(*, sequence, n_input_steps=5, n_output_steps=1):
    X, y = list(), list()
    for i in range(sequence.shape[0]):
        if i + n_input_steps + n_output_steps < len(sequence) + 1:
            seq_x, seq_y = list(sequence[i:i+n_input_steps]), list(sequence[i+n_input_steps:i+n_input_steps+n_output_steps])
            X.append(seq_x)
            y.append(seq_y)
    return np.array(X), np.array(y)

In [None]:
n_input_steps = 1
n_output_steps = 1
#target = '5->8'
#target = '8->5'
target = '5->12'
#target = '8->12'
X_train, y_train = split_sequence(sequence = train_df[target], n_input_steps=n_input_steps, n_output_steps=n_output_steps)
X_test, y_test = split_sequence(sequence = test_df[target], n_input_steps=n_input_steps, n_output_steps=n_output_steps)

In [None]:
X_train.shape

In [None]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))

In [None]:

# TODO: Zmodyfikować tworzenie zbioru w taki sposób, aby funkcja train_model sama wybierała poprawny cel tj. '5->8', '8->5' itd.
def build_model(n_input_steps: int, n_output_steps: int, n_features: int, *, model_type: str) -> tf.keras.Model:
    model_types = ['LSTM']
    if model_type in model_types:
        if model_type == 'LSTM':
            model = keras.Sequential()
            model.add(layers.LSTM(50, activation='relu', input_shape=(n_input_steps, n_features)))
            model.add(layers.Dense(n_output_steps))
            model.compile(optimizer='adam', loss='mse')
    else:
        print('Model niedostępny')
        return
    
    return model

# TODO: Dodać podział na zbiór walidacyjny i testowy?
def train_model(input_data: np.array, input_target: str, output_data: np.array, output_target: str, 
                model_type: str, epochs: int = 20, verbose: int = 1, early_stopping: bool = True) -> str:
    n_input_steps = input_data.shape[1]
    n_output_steps = output_data.shape[1]
    n_features = input_data.shape[2]

    models_path = './saved_models/'

    if not os.path.exists(models_path):
        os.mkdir(models_path)

    saved_models = os.listdir(models_path)
    model_name = f'{model_type}_{input_target}to{output_target}_in{str(n_input_steps)}_out{str(n_output_steps)}'
    model_path = f'{models_path}{model_name}'
    target = f'{input_target}->{output_target}'

    if model_name not in saved_models:
        es = EarlyStopping(monitor='loss', mode='min', verbose=1, patience=3)
        model = build_model(n_input_steps=n_input_steps, n_output_steps=n_output_steps, n_features=n_features, 
                            model_type=model_type)
        model.fit(input_data, output_data, epochs=20, verbose=1, callbacks=[es])
        model.save(model_path)
        del model

        print('Zakończono trening i zapis modelu ')
        print(f'Wczytano ścieżkę modelu: {model_path}\n')

    else:
        print(f'Model {model_name} już istnieje')
        print(f'Wczytano ścieżkę modelu: {model_path}')
        print('W celu wytrenowania nowego modelu należy ręcznie usunąć zapisane pliki\n')

    return model_path

def test_model(input_data: np.array, output_data: np.array, model_path: str = None, model_type: str = None):
    if model_path is None and model_type is None:
        print('Nie wybrano żadnego modelu')
        print('Podaj ścieżkę do modelu lub wybierz typ modelu')
        return
    elif model_path is not None and model_type is not None:
        print('Wybrano zbyt wiele modeli')
        print('Podaj ścieżkę do modelu lub wybierz typ modelu')
        return

    elif model_path is not None and model_type is None:
        model = tf.keras.models.load_model(model_path)
        y_pred = model.predict(input_data, verbose=0)

    elif model_path is None and model_type is not None:
        return
    

    print(f'MAPE: {smape(output_data, y_pred)}')
    for i in range(len(output_data)):
        print(f"real: {output_data[i]} | pred: {y_pred[i]}")

    figure = plt.figure(figsize = (20, 15))
    ax = plt.subplot(111)
    line = ax.plot(np.arange(0, output_data.shape[0], 1), output_data, 'r', linewidth=5)
    line2 = ax.plot(np.arange(0, y_pred.shape[0], 1), y_pred, 'b', linewidth=1)

    box = ax.get_position()
    ax.set_position([box.x0, box.y0 + box.height * 0.1,
                     box.width, box.height * 0.9])

    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
              fancybox=True, shadow=True, ncol=2)

    plt.show()

def smape(A, F):
    tmp = 2 * np.abs(F - A) / (np.abs(A) + np.abs(F))
    len_ = np.count_nonzero(~np.isnan(tmp))
    if len_ == 0 and np.nansum(tmp) == 0: # Deals with a special case
        return 100
    return 100 / len_ * np.nansum(tmp)

In [None]:
model_path = train_model(input_data=X_train, input_target=5, output_data=y_train, output_target=12, model_type='LSTM')
test_model(input_data=X_test, output_data=y_test, model_path=model_path)

In [None]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression().fit(X_train.reshape((X_train.shape[0], X_train.shape[1])), y_train)

In [None]:
y_pred = reg.predict(X_test.reshape((X_test.shape[0], X_test.shape[1])))

In [None]:
print(f'MAPE: {smape(y_test, y_pred):.6f}')

In [None]:
for i in range(len(y_test)):
    print(f"real: {y_test[i]} | pred: {y_pred[i]}")

In [None]:
#from collections import defaultdict
#from functools import partial

#new_dict = defaultdict(lambda: numpy.zeros(array_size))
#defaultdict(partial(numpy.ndarray, 0))

In [None]:
from typing import List, Union

list_of_int = [1, 2, 3]
list_of_str = ['1', '2', '3']

In [None]:
from typeguard import check_type

try:
    check_type('list_of_int', list_of_int, List[int])
    print("string_list conforms to string_list_class")
except TypeError:
    print("string_list does not conform to string_list_class")

In [None]:
check_type('list_of_int', list_of_int, List[int])
