In [1]:
import keras
import pandas as pd
from keras import layers
import tensorflow as tf

import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
import numpy as np
import os

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error as mse
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.layers import Dense, InputLayer, LSTM, Dropout, Bidirectional, SimpleRNN, GRU

# from keras import ops
import numpy as np

In [3]:
DC_POWER_INDEX = 0
LEARNING_RATE = 0.001
RANDOM_STATE = 44
START_TIME = '2020-06-07 00:30:00'
END_TIME = '2020-06-07 04:00:00'
BATCH_SIZE = 1024

def fix_randomness():
    tf.random.set_seed(RANDOM_STATE)

    from numpy.random import seed
    seed(RANDOM_STATE)
    keras.utils.set_random_seed(RANDOM_STATE)

fix_randomness()

MULTIVARIATE

In [4]:
EPOCHS = 500
MULTIV_TRAINING_WINDOW = 40
lead_time = 20

In [5]:
# Creating the model input for multivariate forecasting
def df_to_model_input2(df_np, col_index, window_size):
    df_np = df_np.to_numpy()
    x = []
    y = []

    for i in range(len(df_np) - window_size - lead_time):
        row = [r for r in df_np[i:i+window_size]]
        x.append(row)

        label = df_np[i+lead_time+window_size][col_index]
        y.append(label)

    return np.array(x), np.array(y)

In [6]:
multiv_teacher_path = 'models/girasol/multiv_teacher_model.keras'
multiv_student_path = 'models/girasol/multiv_student_model.keras'

In [7]:
train = pd.read_csv('/.../girasol_met/train.csv')
test = pd.read_csv('/.../girasol_met/val.csv')

In [8]:
NUMBER_OF_FEATURES = len(train.columns)

In [9]:
scaler = MinMaxScaler()
train = pd.DataFrame(scaler.fit_transform(train), columns=train.columns)
test = pd.DataFrame(scaler.transform(test), columns=test.columns)

In [10]:
def get_train_test_val(x, y):
    return x[:train_len], y[:train_len], x[train_len:], y[train_len:]

x, y = df_to_model_input2(train, 7, MULTIV_TRAINING_WINDOW)

ds_len = len(y)
train_len = int(0.8*ds_len)

x_train, y_train, x_val, y_val = get_train_test_val(x, y)
x_test, y_test = df_to_model_input2(test, 7, MULTIV_TRAINING_WINDOW)

In [11]:
x_test.shape,  y_test.shape

((25163, 40, 8), (25163,))

In [12]:
teacher_hidden_layer_units, teacher_dense_params = 64, 8
student_hidden_layer_units, student_dense_params = 16, 8

In [13]:
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

def calculate_mape(scaler, y_pred_scaled, target_column_index=-1):

    # Create dummy arrays to match the scaler's expected input shape
    dummy_data_pred = np.zeros((y_pred_scaled.shape[0], NUMBER_OF_FEATURES))
    dummy_data_test = np.zeros((y_test.shape[0], NUMBER_OF_FEATURES))

    # Insert the predicted values and y_test into the correct column
    dummy_data_pred[:, target_column_index] = y_pred_scaled[:, 0]
    dummy_data_test[:, target_column_index] = y_test[:] # it kept that way to be able to modify if needed

    # Perform inverse transform to get the unscaled predictions and y_test
    unscaled_predictions = scaler.inverse_transform(dummy_data_pred)[:, target_column_index]
    y_test_unscaled = scaler.inverse_transform(dummy_data_test)[:, target_column_index]



    mape = mean_absolute_percentage_error(y_test_unscaled, unscaled_predictions)*100
    return mape

def calculate_metrics(y_true, y_pred):
    """
    Calculate NMSE, NRMSE, and NMAE normalized by the average of true values.
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
        
    Returns:
        dict: A dictionary containing NMSE, NRMSE, and NMAE.
    """
    # Convert inputs to numpy arrays for consistency
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    # Mean Squared Error (MSE) and Mean Absolute Error (MAE) using sklearn
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    mape = calculate_mape(scaler=scaler, y_pred_scaled=y_pred)
    # Variance and mean of true values
    avg_true = np.mean(y_true)
    
    # Calculate metrics
    nrmse = np.sqrt(mse) / avg_true
    nmae = mae / avg_true

    print(f"NRMSE (Normalized Root Mean Squared Error): {nrmse:.4f}")
    print(f"NMAE (Normalized Mean Absolute Error): {nmae:.4f}")
    print(f"MAPE (Mean Absolute Percentage Error): {mape:.4f}") 
    
    # Return metrics as a dictionary
    return nrmse, nmae, mape

In [14]:
from sklearn.preprocessing import MinMaxScaler

RANDOM_STATE = 44
tf.random.set_seed(RANDOM_STATE)
from numpy.random import seed
seed(RANDOM_STATE)
keras.utils.set_random_seed(RANDOM_STATE)

def run_teacher_multiv():

    # Create the model
    model = Sequential()
    model.add(InputLayer((MULTIV_TRAINING_WINDOW, NUMBER_OF_FEATURES)))
    model.add(SimpleRNN(teacher_hidden_layer_units))
    model.add(Dropout(0.5))
    model.add(Dense(teacher_dense_params, 'relu'))
    model.add(Dense(1, 'linear'))

    # Model checkpoint
    model_file = multiv_teacher_path

    cp = ModelCheckpoint(model_file, save_best_only=True)

    # Early stopping to prevent overfitting
    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=5,  # Number of epochs with no improvement after which training will be stopped
        restore_best_weights=True  # Restore the weights of the best epoch after stopping
    )

    # Compile the model
    model.compile(
        loss=MeanSquaredError(),
        optimizer=Adam(learning_rate=LEARNING_RATE),
        metrics=[MeanSquaredError()]
    )
   
    # Train the model
    model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        callbacks=[cp, early_stopping]
    )

run_teacher_multiv()

teacher = load_model(multiv_teacher_path)
print("Teacher Validation RMSE") 
teacher.evaluate(x_val, y_val)

Epoch 1/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - loss: 0.1196 - mean_squared_error: 0.1195 - val_loss: 0.0189 - val_mean_squared_error: 0.0177
Epoch 2/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.0240 - mean_squared_error: 0.0240 - val_loss: 0.0115 - val_mean_squared_error: 0.0111
Epoch 3/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 0.0162 - mean_squared_error: 0.0162 - val_loss: 0.0075 - val_mean_squared_error: 0.0077
Epoch 4/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - loss: 0.0136 - mean_squared_error: 0.0136 - val_loss: 0.0080 - val_mean_squared_error: 0.0081
Epoch 5/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0119 - mean_squared_error: 0.0119 - val_loss: 0.0069 - val_mean_squared_error: 0.0070
Epoch 6/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - lo

[0.004664697218686342, 0.004676278214901686]

In [15]:
teacher = load_model(multiv_teacher_path)

# Make predictions on the scaled test set
y_pred = teacher.predict(x_test)

nrmse, nmae, mape = calculate_metrics(y_test, y_pred)
# Store the results in a dictionary for the teacher
teacher_results = {
    "NRMSE": nrmse,
    "NMAE": nmae,
    "MAPE": mape
}

# Print the results in a single row
print(f"Teacher Results: NRMSE = {teacher_results['NRMSE']:.4f}, NMAE = {teacher_results['NMAE']:.4f}, MAPE = {teacher_results['MAPE']:.4f}")


[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
NRMSE (Normalized Root Mean Squared Error): 0.1828
NMAE (Normalized Mean Absolute Error): 0.1223
MAPE (Mean Absolute Percentage Error): 25.1528
Teacher Results: NRMSE = 0.1828, NMAE = 0.1223, MAPE = 25.1528


In [16]:
from sklearn.preprocessing import MinMaxScaler

RANDOM_STATE = 44
tf.random.set_seed(RANDOM_STATE)
from numpy.random import seed
seed(RANDOM_STATE)
keras.utils.set_random_seed(RANDOM_STATE)

def run_student_multiv():

    # Create the model
    model = Sequential()
    model.add(InputLayer((MULTIV_TRAINING_WINDOW, NUMBER_OF_FEATURES)))
    model.add(SimpleRNN(student_hidden_layer_units))
    model.add(Dropout(0.5))
    model.add(Dense(student_dense_params, 'relu'))
    model.add(Dense(1, 'linear'))

    # Model checkpoint
    model_file = multiv_student_path

    cp = ModelCheckpoint(model_file, save_best_only=True)

    # Early stopping to prevent overfitting
    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=5,  # Number of epochs with no improvement after which training will be stopped
        restore_best_weights=True  # Restore the weights of the best epoch after stopping
    )

    # Compile the model
    model.compile(
        loss=MeanSquaredError(),
        optimizer=Adam(learning_rate=LEARNING_RATE),
        metrics=[MeanSquaredError()]
    )
   
    # Train the model
    model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        batch_size=BATCH_SIZE,
        epochs=500,
        callbacks=[cp, early_stopping]
    )

run_student_multiv()

student = load_model(multiv_student_path)
print("Student Validation RMSE") 
student.evaluate(x_val, y_val)

Epoch 1/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.2547 - mean_squared_error: 0.2546 - val_loss: 0.1441 - val_mean_squared_error: 0.1335
Epoch 2/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0831 - mean_squared_error: 0.0831 - val_loss: 0.1005 - val_mean_squared_error: 0.0924
Epoch 3/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0600 - mean_squared_error: 0.0600 - val_loss: 0.0745 - val_mean_squared_error: 0.0696
Epoch 4/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0446 - mean_squared_error: 0.0446 - val_loss: 0.0517 - val_mean_squared_error: 0.0480
Epoch 5/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0333 - mean_squared_error: 0.0333 - val_loss: 0.0397 - val_mean_squared_error: 0.0367
Epoch 6/500
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 

[0.014528005383908749, 0.014525691978633404]

In [17]:
student = load_model(multiv_student_path)

# Make predictions on the scaled test set
y_pred = student.predict(x_test)

nrmse, nmae, mape = calculate_metrics(y_test, y_pred)

# Store the results in a dictionary
student_results = {
    "NRMSE": nrmse,
    "NMAE": nmae,
    "MAPE": mape
}
# Print the results in a single row
print(f"Student Results: NRMSE = {student_results['NRMSE']:.4f}, NMAE = {student_results['NMAE']:.4f}, MAPE = {student_results['MAPE']:.4f}")


[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 879us/step
NRMSE (Normalized Root Mean Squared Error): 0.2913
NMAE (Normalized Mean Absolute Error): 0.2352
MAPE (Mean Absolute Percentage Error): 40.6118
Student Results: NRMSE = 0.2913, NMAE = 0.2352, MAPE = 40.6118


In [18]:
from tensorflow.keras.saving import register_keras_serializable

@register_keras_serializable()
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super().__init__()
        self.teacher = teacher
        self.student = student
        self._loss_tracker = keras.metrics.Mean(name="loss")

    def compile(
        self,
        optimizer,
        loss,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        loss_threshold,
        alpha,
        temperature

    ):
        """Configure the distiller.

        Args:
            optimizer: Keras optimizer for the student weights
            metrics: Keras metrics for evaluation
            student_loss_fn: Loss function of difference between student
                predictions and ground-truth
            distillation_loss_fn: Loss function of difference between soft
                student predictions and soft teacher predictions
            alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
            temperature: Temperature for softening probability distributions.
                Larger temperature gives softer distributions.
        """
        super().compile(loss=loss, optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature
        self.loss_threshold = loss_threshold

    def get_config(self):
        config = {
            'student': self.student.to_json(),
            'teacher': self.teacher.to_json()
        }

        return config

    @classmethod
    def from_config(cls, config):
        student = tf.keras.models.model_from_json(config.pop('student'))
        teacher = tf.keras.models.model_from_json(config.pop('teacher'))
        return cls(student=student, teacher=teacher, **config)

    def train_step(self, data):
        # Unpack the data
        x, y = data

        with tf.GradientTape() as tape:
            # Forward pass
            y_pred = self.student(x, training=True)
            # Compute the loss value
            loss = self.compute_loss(x, y, y_pred)

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`
        self.compiled_metrics.update_state(y, y_pred)

        # Return a dict mapping metric names to current value
        results =  {m.name: m.result() for m in self.metrics}

        results['total loss (train)'] = loss if isinstance(loss, float) else tf.reduce_mean(loss)

        return results


    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Forward pass to get student's predictions
        y_pred = self.student(x, training=False)

        # Compute the validation loss
        val_loss = self.compute_loss(x, y, y_pred)

        # Manually update the metrics for validation
        self.compiled_metrics.update_state(y, y_pred)

        # Collect results for all metrics
        results = {m.name: m.result() for m in self.metrics}

        # Ensure 'val_loss' is properly reduced to a scalar and reported
        results['total loss (val)'] = val_loss if isinstance(val_loss, float) else tf.reduce_mean(val_loss)

        return results



    def compute_loss(
        # self, x=None, y=None, y_pred=None, sample_weight=None, allow_empty=False
        self, x=None, y=None, y_pred=None, sample_weight=None, allow_empty=False

    ):

        mse = MeanSquaredError()

        # Compute predictions by the teacher model
        teacher_pred = self.teacher(x, training=False)

        # Compute the MSE loss between true labels and student predictions
        student_loss = mse(y, y_pred)

        # Teacher loss is the
        temp = self.temperature
        # loss = self.alpha * student_loss + (1 - self.alpha) * teacher_loss
        loss = self.alpha * student_loss + (1 - self.alpha) * (mse(y_pred/temp, teacher_pred/temp)* (temp ** 2))


        return loss

    def call(self, x):
        return self.student(x)

In [19]:
def run_multiv_distillation(teacher, alpha, threshold, temperature, file_name, window_size):

    # Create the model
    model = Sequential()
    model.add(InputLayer((MULTIV_TRAINING_WINDOW, NUMBER_OF_FEATURES)))
    model.add(SimpleRNN(student_hidden_layer_units))
    model.add(Dropout(0.5))
    model.add(Dense(student_dense_params, 'relu'))
    model.add(Dense(1, 'linear'))

    # Create the distiller class
    distiller = Distiller(student=model, teacher=teacher)

    # Compile the distiller class
    distiller.compile(
    optimizer = Adam(learning_rate=LEARNING_RATE),
    loss = MeanSquaredError(),
    metrics = [RootMeanSquaredError()],
    student_loss_fn = MeanSquaredError(),
    distillation_loss_fn = distiller.compute_loss, #MeanSquaredError(),
    loss_threshold = threshold,
    alpha=alpha,
    temperature=temperature
    )

    dummy_x = tf.random.normal([1, *((window_size, NUMBER_OF_FEATURES))])  # Replace `input_shape` with the actual shape of your input
    _ = distiller(dummy_x) 

    d_check = ModelCheckpoint(file_name, monitor='root_mean_squared_error',save_best_only=True)

    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=5,  # Number of epochs with no improvement after which training will be stopped
        restore_best_weights=True  # Restore the weights of the best epoch after stopping
    )

    distiller.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=EPOCHS, callbacks=[d_check, early_stopping])

    distilled_student = load_model(file_name)
    # Make predictions using the distilled student model
    y_pred = distilled_student.predict(x_val)

    # Calculate the root mean squared error (RMSE)
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    print("Distilled Student RMSE:", rmse)

In [20]:
alphas = [0.3, 0.5, 0.7]
temps = [5]
threshold = 0

# Initialize a 2D list to store RMSE values
rmse_matrix = []

# Iterate over alphas and temps
for alpha in alphas:
    row_rmse = []
    for temp in temps:

        RANDOM_STATE = 44
        tf.random.set_seed(RANDOM_STATE)
        from numpy.random import seed
        seed(RANDOM_STATE)
        keras.utils.set_random_seed(RANDOM_STATE)

        multiv_distill_file_name = 'models/girasol/distils/multiv_distil_'+str(alpha)+'_'+str(temp)+'/multiv_distil.keras'
        
        run_multiv_distillation(teacher, alpha, threshold, temp,  multiv_distill_file_name, MULTIV_TRAINING_WINDOW)
        
        # Load the model
        multiv_distill_file_name = f'models/girasol/distils/multiv_distil_{alpha}_{temp}/multiv_distil.keras'
        student_kd = load_model(multiv_distill_file_name)

        # Make predictions on the scaled test set
        y_pred = student_kd.predict(x_test)

        nrmse, nmae, mape = calculate_metrics(y_test, y_pred)
        
        row_rmse.append(nrmse)

    # Append the row of RMSE values to the matrix
    rmse_matrix.append(row_rmse)

# Convert the list to a NumPy array for plotting
rmse_matrix = np.array(rmse_matrix)

Epoch 1/500


```
for metric in self.metrics:
    metric.update_state(y, y_pred)
```

  return self._compiled_metrics_update_state(


[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - root_mean_squared_error: 0.2488 - loss: 0.4731 - total loss (train): 0.0276 - val_loss: 0.5075 - val_total loss (val): 0.0026
Epoch 2/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.1001 - loss: 0.5275 - total loss (train): 0.0054 - val_loss: 0.5321 - val_total loss (val): 0.0049
Epoch 3/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.0902 - loss: 0.5289 - total loss (train): 0.0039 - val_loss: 0.5109 - val_total loss (val): 0.0045
Epoch 4/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.0852 - loss: 0.5286 - total loss (train): 0.0033 - val_loss: 0.4998 - val_total loss (val): 0.0031
Epoch 5/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - root_mean_squared_error: 0.0829 - loss: 0.5287 - tota

  instance.compile_from_config(compile_config)


[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Distilled Student RMSE: 0.08920917190644247
[1m126/787[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 801us/step

  instance.compile_from_config(compile_config)


[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 741us/step
NRMSE (Normalized Root Mean Squared Error): 0.1957
NMAE (Normalized Mean Absolute Error): 0.1490
MAPE (Mean Absolute Percentage Error): 21.0848
Epoch 1/500


```
for metric in self.metrics:
    metric.update_state(y, y_pred)
```

  return self._compiled_metrics_update_state(


[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - root_mean_squared_error: 0.2485 - loss: 0.4724 - total loss (train): 0.0288 - val_loss: 0.5064 - val_total loss (val): 0.0034
Epoch 2/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.1000 - loss: 0.5266 - total loss (train): 0.0066 - val_loss: 0.5319 - val_total loss (val): 0.0041
Epoch 3/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.0902 - loss: 0.5281 - total loss (train): 0.0051 - val_loss: 0.5116 - val_total loss (val): 0.0091
Epoch 4/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.0851 - loss: 0.5278 - total loss (train): 0.0044 - val_loss: 0.4997 - val_total loss (val): 0.0060
Epoch 5/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - root_mean_squared_error: 0.0831 - loss: 0.5278 - tota

  instance.compile_from_config(compile_config)


[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 988us/step
Distilled Student RMSE: 0.09060447726050369
[1m123/787[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 833us/step

  instance.compile_from_config(compile_config)


[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
NRMSE (Normalized Root Mean Squared Error): 0.1980
NMAE (Normalized Mean Absolute Error): 0.1510
MAPE (Mean Absolute Percentage Error): 21.1167
Epoch 1/500


```
for metric in self.metrics:
    metric.update_state(y, y_pred)
```

  return self._compiled_metrics_update_state(


[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - root_mean_squared_error: 0.2478 - loss: 0.4719 - total loss (train): 0.0298 - val_loss: 0.5040 - val_total loss (val): 0.0056
Epoch 2/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.1004 - loss: 0.5258 - total loss (train): 0.0077 - val_loss: 0.5380 - val_total loss (val): 0.0106
Epoch 3/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.0903 - loss: 0.5273 - total loss (train): 0.0062 - val_loss: 0.5089 - val_total loss (val): 0.0145
Epoch 4/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.0856 - loss: 0.5269 - total loss (train): 0.0056 - val_loss: 0.5029 - val_total loss (val): 0.0098
Epoch 5/500
[1m1162/1162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - root_mean_squared_error: 0.0836 - loss: 0.5269 - tota

  instance.compile_from_config(compile_config)


[1m291/291[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 796us/step
Distilled Student RMSE: 0.0892657663739947
[1m170/787[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 596us/step

  instance.compile_from_config(compile_config)


[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 623us/step
NRMSE (Normalized Root Mean Squared Error): 0.2102
NMAE (Normalized Mean Absolute Error): 0.1633
MAPE (Mean Absolute Percentage Error): 27.5346


In [21]:
import numpy as np
from tensorflow.keras.models import load_model

# Example inputs
alphas = [0.3]
temps = [5]
threshold = 0

# Initialize a dictionary to store results
student_kd_results = []

# Iterate over alphas and temps
for alpha in alphas:
    for temp in temps:
        multiv_distill_file_name = f'models/girasol/distils/multiv_distil_{alpha}_{temp}/multiv_distil.keras'
        
        # Load the model
        student_kd = load_model(multiv_distill_file_name)

        # Make predictions on the scaled test set
        y_pred = student_kd.predict(x_test)

        # Calculate metrics
        nrmse, nmae, mape = calculate_metrics(y_test, y_pred)
        
        # Add the results to the dictionary
        student_kd_results.append({
            "alpha": alpha,
            "temp": temp,
            "NRMSE": nrmse,
            "NMAE": nmae,
            "MAPE": mape
        })

# Print the results
for result in student_kd_results:
    print(f"Alpha: {result['alpha']}, Temp: {result['temp']}, NRMSE: {result['NRMSE']:.4f}, "
          f"NMAE: {result['NMAE']:.4f}, MAPE: {result['MAPE']:.4f}")

[1m167/787[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 605us/step

  instance.compile_from_config(compile_config)


[1m787/787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 625us/step
NRMSE (Normalized Root Mean Squared Error): 0.1957
NMAE (Normalized Mean Absolute Error): 0.1490
MAPE (Mean Absolute Percentage Error): 21.0848
Alpha: 0.3, Temp: 5, NRMSE: 0.1957, NMAE: 0.1490, MAPE: 21.0848


RESULTS

Efficiency Metrics

In [22]:
teacher = load_model(multiv_teacher_path)

# 1. Print model summary (Layer details and parameter count)
print("Model Summary:")
teacher.summary()

# 2. Get the total number of parameters
total_params = teacher.count_params()
print(f'\nTotal number of parameters: {total_params}')

# 3. Get the size of the model file on disk
model_size = os.path.getsize(multiv_teacher_path) / 1024  #a Convert from bytes to MB
print(f'Model size on disk: {model_size:.2f} KB')

Model Summary:



Total number of parameters: 5201
Model size on disk: 91.61 KB


In [23]:
student = load_model(multiv_student_path)

# 1. Print model summary (Layer details and parameter count)
print("Model Summary:")
student.summary()

# 2. Get the total number of parameters
total_params = student.count_params()
print(f'\nTotal number of parameters: {total_params}')

# 3. Get the size of the model file on disk
model_size = os.path.getsize(multiv_student_path) / 1024  #a Convert from bytes to MB
print(f'Model size on disk: {model_size:.2f} KB')

Model Summary:



Total number of parameters: 545
Model size on disk: 37.05 KB


Error Metrics

In [24]:
# Print the results in a single row
print(f"Teacher Results: NRMSE = {teacher_results['NRMSE']:.4f}, NMAE = {teacher_results['NMAE']:.4f}, MAPE = {teacher_results['MAPE']:.4f}")

# Print the results in a single row
print(f"Student Results: NRMSE = {student_results['NRMSE']:.4f}, NMAE = {student_results['NMAE']:.4f}, MAPE = {student_results['MAPE']:.4f}")

print('Student KD Results')
# Print the results
for result in student_kd_results:
    print(f"Alpha: {result['alpha']}, Temp: {result['temp']}, NRMSE: {result['NRMSE']:.4f}, "
          f"NMAE: {result['NMAE']:.4f}, MAPE: {result['MAPE']:.4f}")

Teacher Results: NRMSE = 0.1828, NMAE = 0.1223, MAPE = 25.1528
Student Results: NRMSE = 0.2913, NMAE = 0.2352, MAPE = 40.6118
Student KD Results
Alpha: 0.3, Temp: 5, NRMSE: 0.1957, NMAE: 0.1490, MAPE: 21.0848


In [25]:
import seaborn as sns

alphas = [0.3, 0.5, 0.7]
temps = [5]

# Initialize a 2D list to store RMSE values
rmse_matrix = []

# Iterate over alphas and temps
for alpha in alphas:
    row_rmse = []
    for temp in temps:
        # Load the model
        multiv_distill_file_name = f'models/girasol/distils/multiv_distil_{alpha}_{temp}/multiv_distil.keras'
        student_kd = load_model(multiv_distill_file_name)

        # Make predictions on the scaled test set
        y_pred = student_kd.predict(x_test)

        nrmse, nmae, mape = calculate_metrics(y_test, y_pred)
        
        row_rmse.append(nrmse)

    # Append the row of RMSE values to the matrix
    rmse_matrix.append(row_rmse)

# Convert the list to a NumPy array for plotting
rmse_matrix = np.array(rmse_matrix)

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(rmse_matrix, annot=True, fmt='.2f', cmap='viridis', xticklabels=temps, yticklabels=alphas)
plt.xlabel('Temperature')
plt.ylabel('Alpha')
plt.title('Distilled Models on Multivariate Data')
plt.show()

ModuleNotFoundError: No module named 'seaborn'