In [1]:
from keras.layers import (
    Dense,
   # Dropout,
   # Flatten,
   # Input,
   # concatenate,
)
# from keras.layers import Embedding
from keras.models import Model, Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import sys
import pandas as pd
import numpy as np
import matplotlib as plt
import tensorflow as tf
import random
from itertools import product
from sklearn.preprocessing import MinMaxScaler

2024-12-03 21:54:48.097231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# add framework_tsa to path
sys.path.append('../framework_tsa')

In [3]:
import ffnn_explainer

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# functions to predict new values of kpi 

"""### FFNN model"""

def forecast_with_ffnn(
    time_series, n_lags=14, hidden_units_options=[32, 64, 128], epochs=50, batch_size=16, seed=0, plot_results=True
):
    # Set seed for reproducibility
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)

    # Make sure the time series is ordered and has no missing values
    time_series = time_series.dropna()
    time_series = time_series.sort_index()

    # Split the time series into training, validation, and test sets
    train_size = int(len(time_series) * 0.7)
    val_size = int(len(time_series) * 0.2)
    test_size = len(time_series) - train_size - val_size

    train = time_series.iloc[:train_size]
    validation = time_series.iloc[train_size:train_size + val_size]
    test = time_series.iloc[train_size + val_size:]

    # Scale the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = scaler.fit_transform(train)
    validation_scaled = scaler.transform(validation)
    test_scaled = scaler.transform(test)
    train_val_scaled = np.vstack([train_scaled, validation_scaled])

    # Prepare the data
    def prepare_data(series, n_lags):
        series = np.array(series)
        X = np.lib.stride_tricks.sliding_window_view(series, window_shape=n_lags)[:-1]
        y = series[n_lags:]
        return X, y

    X_train_val, y_train_val = prepare_data(train_val_scaled.flatten(), n_lags)
    X_test, y_test = prepare_data(test_scaled.flatten(), n_lags)

    # Split train and validation
    split_idx = len(train)
    X_train, y_train = X_train_val[:split_idx-n_lags], y_train_val[:split_idx-n_lags]
    X_val, y_val = X_train_val[split_idx-n_lags:], y_train_val[split_idx-n_lags:]

    # Reshape data
    X_train = X_train.reshape(X_train.shape[0], n_lags)
    X_val = X_val.reshape(X_val.shape[0], n_lags)
    X_test = X_test.reshape(X_test.shape[0], n_lags)

    # Hyperparameter tuning
    best_rmse = float('inf')
    best_units = None
    for units in hidden_units_options:
        model = Sequential([
            Dense(units, activation='relu', input_dim=n_lags),
            Dense(units // 2, activation='relu'),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mean_squared_error')
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

        y_val_pred = model.predict(X_val).ravel()
        y_val_rescaled = scaler.inverse_transform(y_val.reshape(-1, 1)).ravel()
        y_val_pred_rescaled = scaler.inverse_transform(y_val_pred.reshape(-1, 1)).ravel()
        rmse = np.sqrt(mean_squared_error(y_val_rescaled, y_val_pred_rescaled))

        if rmse < best_rmse:
            best_rmse = rmse
            best_units = units

    # Final model training
    model = Sequential([
        Dense(best_units, activation='relu', input_dim=n_lags),
        Dense(best_units // 2, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train_val, y_train_val, epochs=epochs, batch_size=batch_size, verbose=0)

    # Predictions
    y_test_pred = model.predict(X_test).ravel()
    y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()
    y_test_pred_rescaled = scaler.inverse_transform(y_test_pred.reshape(-1, 1)).ravel()

    # Sliding window predictions
    predictions = []
    input_seq = X_test[0]
    for _ in range(len(test)):
        pred = model.predict(input_seq.reshape(1, -1), verbose=0)[0, 0]
        predictions.append(pred)
        input_seq = np.append(input_seq[1:], pred)

    predictions_rescaled = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()

    # Residuals and confidence interval
    residuals = y_test_rescaled.flatten() - y_test_pred_rescaled
    std_residuals = np.std(residuals)
    confidence_interval = (predictions_rescaled - 2 * std_residuals, predictions_rescaled + 2 * std_residuals)

    # Plot results
    if plot_results:
        plt.figure(figsize=(8, 4))
        plt.plot(pd.to_datetime(time_series.index), time_series['value'], label='Full Series', color='gray', linestyle='--', alpha=0.5)
        plt.plot(pd.to_datetime(train.index), train['value'], label='Train', color='blue')
        plt.plot(pd.to_datetime(validation.index), validation['value'], label='Validation', color='green')
        plt.plot(pd.to_datetime(test.index), test['value'], label='Test', color='red')
        plt.plot(pd.to_datetime(test.index), predictions_rescaled, label='Forecast', color='orange')
        plt.fill_between(
            test.index,
            predictions_rescaled - 2 * std_residuals,
            predictions_rescaled + 2 * std_residuals,
            color='orange', alpha=0.2, label='Confidence Interval'
        )
        plt.title('FFNN Forecast with Train, Validation, Test, and Confidence Interval')
        plt.xlabel('Time')
        plt.ylabel('Consumption')
        plt.legend()
        plt.grid()
        plt.show()

    return model, scaler

In [5]:
import sys
sys.path.append('../Dataset')

In [6]:
data = pd.read_pickle('../smart_app_data.pkl')

In [7]:
# creation of offline feature store, called FEAT_DATA

# Valori unici per ogni colonna
names = data['name'].unique()
kpis = data['kpi'].unique()
cols = ['sum', 'avg', 'min', 'max']

# Genera tutte le combinazioni possibili
combinations = list(product(names, kpis, cols))

# Crea un DataFrame a partire dalle combinazioni
feat_data = pd.DataFrame(combinations, columns=['name', 'kpi', 'col'])

# Aggiungi le altre colonne con valori casuali
feat_data['selected_f'] = np.random.choice([7, 30], len(feat_data))
feat_data['unusable_data_bool'] = np.random.choice([True, False], len(feat_data))
feat_data['inconsistencies_flag'] = np.random.choice([True, False], len(feat_data))
feat_data['h_trend_drift'] = np.random.normal(0, 1, len(feat_data))

decompositions = []
for _ in range(len(feat_data)):
    trend = np.random.rand(233)  # Trend fittizio
    seasonal = np.random.rand(233)  # Seasonal fittizio
    residuals = np.random.rand(233)  # Residuals fittizio
    decompositions.append({
        'trend': trend,
        'seasonal': seasonal,
        'residuals': residuals
    })
feat_data['best_decomposition'] = decompositions

# Aggiungi la colonna count_miss_values con vettori di 10 zeri
feat_data['count_miss_values'] = [np.zeros(10).tolist() for _ in range(len(feat_data))]
feat_data['time_series'] = [np.random.rand(233) for _ in range(len(feat_data))]

# Mostra le prime righe del DataFrame
feat_data.head()

Unnamed: 0,name,kpi,col,selected_f,unusable_data_bool,inconsistencies_flag,h_trend_drift,best_decomposition,count_miss_values,time_series
0,Large Capacity Cutting Machine 1,working_time,sum,7,False,False,0.362321,"{'trend': [0.14130069905930454, 0.829459955311...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6933152988336704, 0.9371253794295603, 0.133..."
1,Large Capacity Cutting Machine 1,working_time,avg,7,True,False,-1.110392,"{'trend': [0.5609923583193656, 0.7911506155336...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6268468606234581, 0.7732760976448547, 0.012..."
2,Large Capacity Cutting Machine 1,working_time,min,30,False,True,-0.775245,"{'trend': [0.27097451371627557, 0.826777486751...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.7130808414504867, 0.41865744438545405, 0.05..."
3,Large Capacity Cutting Machine 1,working_time,max,30,True,False,0.841182,"{'trend': [0.9349481961370171, 0.1643700942223...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.15051081303471137, 0.2611618488050198, 0.21..."
4,Large Capacity Cutting Machine 1,idle_time,sum,7,True,True,-0.803067,"{'trend': [0.21443884861264562, 0.325835368012...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.9629539288049326, 0.48448134042859514, 0.73..."


In [8]:
# Extracting the time_series for each combination
for name in feat_data['name'].unique():
    for k in feat_data['kpi'].unique():
        for col in feat_data['col'].unique():
            # Filter the dataframe to get the corresponding row
            time_series = feat_data[(feat_data['name'] == name) &
                                   (feat_data['kpi'] == k) &
                                   (feat_data['col'] == col)]['time_series']
            time_series = pd.DataFrame({'value': time_series.tolist()[0]}, index = data['time'].unique())

            # Pass the time_series to the detect anomalies function
            trained_ffnn, _ = forecast_with_ffnn(time_series, plot_results=False)
            break
        break
    break

2024-12-03 21:54:55.944821: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.




In [9]:
explainer = ffnn_explainer.get_regression_explainer(trained_ffnn, time_series)

ValueError: Since axis is `None`, must provide window_shape for all dimensions of `x`; got 1 window_shape elements and `x.ndim` is 2.

In [None]:
"""### Prediction of future values

Here we use the trained model to predict the future values \
The function is called once a day when we receive the new data, and the new predictions will substitute the previous ones
"""

def forecast_future(time_series, trained_ffnn, scaler, n_future = 21, n_lags = 14, plot_results=True):
    """
    Forecast future values of the time series using the trained FFNN model.

    Parameters:
    - time_series (pd.Series): The input time series.
    - trained_ffnn (tensorflow.keras.models.Sequential): The trained FFNN model.
    - scaler (sklearn.preprocessing.MinMaxScaler): The scaler used to scale the data.
    - n_future (int): The number of future values to predict.
    - n_lags (int): The number of lags (time window) for the prediction. N.B. must be the same of training function
    - plot_results (bool): Whether to plot the results.

    Returns:
    - predictions_rescaled (numpy.array): An array containing the future predictions.
    """

    if len(time_series) >= n_lags:

      # Extract the last sequence of n_lags from the time series
      last_sequence = time_series[-n_lags:].values.reshape(1, -1)  # Last n_lags values

      predictions = []
      input_seq = last_sequence.flatten()

      # Standard deviation of residuals for uncertainty
      residuals = time_series[-n_lags:].values - trained_ffnn.predict(input_seq.reshape(1, -1), verbose=0).flatten()
      std_residuals = np.std(residuals)

      # Predict the future values
      for _ in range(n_future):
          # Predict the next value
          pred = trained_ffnn.predict(input_seq.reshape(1, -1), verbose=0)[0, 0]
          predictions.append(pred)

          # Add the prediction to the input sequence for the next step
          input_seq = np.append(input_seq[1:], pred)

      # Rescale the predictions back to the original scale
      predictions_rescaled = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()

      # Confidence interval (± 2 standard deviations)
      confidence_interval_lower = predictions_rescaled - 2 * std_residuals
      confidence_interval_upper = predictions_rescaled + 2 * std_residuals

      # Plot the results
      if plot_results:
          plt.figure(figsize=(8, 4))
          plt.plot(pd.to_datetime(time_series.index), time_series.values, label='Original Series', color='gray', linestyle='--', alpha=0.5)
          future_dates = pd.date_range(pd.to_datetime(time_series.index[-1]), periods=n_future + 1, freq='D')[1:]
          plt.plot(future_dates, predictions_rescaled, label='Forecast', color='orange')
          plt.fill_between(future_dates, confidence_interval_lower, confidence_interval_upper,
                          color='orange', alpha=0.2, label='Confidence Interval')
          plt.title('FFNN Forecast with Confidence Interval')
          plt.xlabel('Time')
          plt.ylabel('Value')
          plt.legend()
          plt.grid()
          plt.show()
    else:
      print('Time series length not sufficient to perform prediction')

    return predictions_rescaled

In [None]:
time_series.to_list()

AttributeError: 'DataFrame' object has no attribute 'to_list'

In [None]:
time_series.iloc[15,:]

value    0.478933
Name: 2024-03-16T00:00:00Z, dtype: float64

In [None]:
sv = ffnn_explainer.get_explanation(explainer, time_series[15,:])

AttributeError: module 'ffnn_explainer' has no attribute 'get_explanation'