In [1]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
import pydot
import graphviz

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.model_selection import StratifiedShuffleSplit



2.13.0


In [2]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Load the data
X = np.load('/kaggle/input/datidati/training_data.npy')
categories = np.load("/kaggle/input/datidati/categories.npy")
valid_periods = np.load('/kaggle/input/datidati/valid_periods.npy')
start_times = valid_periods[:,0]
end_times = valid_periods[:,1]

In [4]:
def encode_categories(categories):
    unique_categories = np.unique(categories)
    category_to_int = {category: i for i, category in enumerate(unique_categories)}
    return np.array([category_to_int[category] for category in categories])

In [10]:
'''

X = np.load('/kaggle/input/datidati/training_data.npy')

# Create an empty list to store scaled rows
X_scaled=X.copy()

# Convert scales and centers lists to NumPy arrays
scales = np.zeros(X.shape[0])
centers = np.zeros(X.shape[0])

strange_s=[]
strange_c=[]

# Scale each row of X independently
for i in range(X.shape[0]):  # Loop over each row

    # Create a mask for non-zero values in the current row
    non_zero_mask = (X[i, :] != 0.0)

    # Extract non-zero values for fitting the scaler
    non_zero_data = X[i, non_zero_mask].reshape(-1, 1)

    # Initialize the scaler and fit on non-zero data
    scaler = RobustScaler().fit(non_zero_data)

    # Save the scale and center for the current row
    scales[i]=scaler.scale_[0]
    centers[i]=scaler.center_[0]
    
    if (scales[i]>0.9 or scales[i]<0.05):
        strange_s.append(i)
        
    if (centers[i]>0.9 or centers[i]<0.05):
        strange_c.append(i)

    # Transform the entire row using the fitted scaler
    X_scaled_row = scaler.transform(X[i, non_zero_mask].reshape(-1, 1)).flatten()

    # Fill the non-zero elements with the scaled values
    X_scaled[i, non_zero_mask] = X_scaled_row

print(X_scaled.shape, scales.shape,centers.shape, len(strange_s), len(strange_c))

'''

(48000, 2776) (48000,) (48000,) 365 361


In [23]:
# Set a seed for reproducibility
seed = 42  # You can use any integer as the seed

# Define the StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=seed)

train_indexes=[]
val_indexes=[]

# Perform the split
for train_index, val_index in sss.split(X, categories):
    X_train_noseq, X_val_noseq = X[train_index], X[val_index]
    cat_train_noseq, cat_val_noseq = categories[train_index], categories[val_index]
    train_indexes.append(train_index)
    val_indexes.append(val_index)

# Check the shapes of the resulting sets
print("X_train shape:", X_train_noseq.shape)
print("X_val shape:", X_val_noseq.shape)
print("cat_train shape:", cat_train_noseq.shape)
print("cat_val shape:", cat_val_noseq.shape)

X_train shape: (42692, 2776)
X_val shape: (4744, 2776)
cat_train shape: (42692,)
cat_val shape: (4744,)


In [24]:
def extract_sequences(array, window, stride, start_times, end_times, categories, shuffle=True, seed=42, telescope=9):
    X = []
    y = []
    cat=[]
    
    category_flags = encode_categories(categories)  # Integer-encode the category flags

    for idx in range(len(array)):
        start_time = start_times[idx]
        end_time = end_times[idx]
        category_flag = category_flags[idx]  # Extract the integer-encoded category flag

        # Extract the actual non-zero part of the time series within the specified time range
        actual_data = array[idx, start_time:end_time]

        padding_check = len(actual_data)%window

        if(padding_check != 0):
            # Compute padding length
            padding_len = window - len(actual_data)%window
            padding = np.zeros(padding_len,dtype='float32')
            actual_data = np.concatenate((padding,actual_data))
            assert len(actual_data) % window == 0

        # Genera sequenze con la finestra e lo stride specificati
        for i in range(0, len(actual_data) - window - telescope, stride):
            sequence = actual_data[i:i + window]
            X.append(sequence)
            y.append(actual_data[i+window: i+ window+telescope])
            cat.append(categories[idx])


    X = np.array(X)
    y = np.array(y)
    cat=np.array(cat)

    if shuffle:
        np.random.seed(seed)
        indices = np.arange(len(X))
        np.random.shuffle(indices)
        X = X[indices]
        y = y[indices]
        cat = cat[indices]

    cat=encode_categories(cat)
        
    return X, y, cat


In [25]:

# Set a seed for reproducibility
seed = 42 

# Define the StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=seed)

train_indexes=[]
val_indexes=[]

# Perform the split
for train_index, val_index in sss.split(X, categories):
    X_train_noseq, X_val_noseq = X[train_index], X[val_index]
    cat_train_noseq, cat_val_noseq = categories[train_index], categories[val_index]
    train_indexes.append(train_index)
    val_indexes.append(val_index)

# Check the shapes of the resulting sets
print("X_train_noseq shape:", X_train_noseq.shape)
print("X_val_noseq shape:", X_val_noseq.shape)
print("cat_train_noseq shape:", cat_train_noseq.shape)
print("cat_val_noseq shape:", cat_val_noseq.shape)

X_train_noseq shape: (42692, 2776)
X_val_noseq shape: (4744, 2776)
cat_train_noseq shape: (42692,)
cat_val_noseq shape: (4744,)


In [26]:
sequence_length = 100
stride = 12

#X_train, y_train = build_sequences(X, start_times, end_times, telescope=telescope_size, window=window_size, stride=stride_size)
categories = np.load("/kaggle/input/datidati/categories.npy")

start_times_train=start_times[train_indexes][0]
end_times_train=end_times[train_indexes][0]

start_times_val=start_times[val_indexes][0]
end_times_val=end_times[val_indexes][0]

X_train, y_train, cat_train = extract_sequences(X_train_noseq,sequence_length,stride, start_times_train, end_times_train, cat_train_noseq)
X_val, y_val, cat_val = extract_sequences(X_val_noseq,sequence_length,stride, start_times_val, end_times_val, cat_val_noseq)

In [27]:
X_train.shape, y_train.shape, cat_train.shape, X_val.shape, y_val.shape, cat_val.shape

((490855, 100), (490855, 9), (490855,), (53770, 100), (53770, 9), (53770,))

In [29]:

class TimeSeriesAugmentation(tf.keras.layers.Layer):
    def __init__(self, jitter=0.01, scale=0.1, time_warping=0.2, magnitude_warping=0.2, **kwargs):
        super(TimeSeriesAugmentation, self).__init__(**kwargs)
        self.jitter = jitter
        self.scale = scale
        self.time_warping = time_warping
        self.magnitude_warping = magnitude_warping

    def call(self, inputs, training=None):
        if training:
            augmented_inputs = self.augment(inputs)
            return augmented_inputs
        return inputs

    def augment(self, series):
        # Apply jitter
        noise = np.random.normal(0, self.jitter, size=len(series))
        series = series + noise

        # Apply scaling
        scaling_factor = np.random.uniform(1 - self.scale, 1 + self.scale)
        series = series * scaling_factor

        # Apply time warping
        num_points = len(series)
        time_warp_factor = np.random.uniform(1 - self.time_warping, 1 + self.time_warping)
        warped_indices = np.arange(0, num_points, time_warp_factor)[:num_points]
        series = np.interp(np.arange(num_points), warped_indices, series)

        # Apply magnitude warping
        magnitude_warp_factor = np.random.uniform(1 - self.magnitude_warping, 1 + self.magnitude_warping)
        series = series * magnitude_warp_factor

        return series
    
    
    
def create_input_mask(inputs):
    # Create a mask for positions with zero values in the inputs
    mask = tf.cast(tf.math.not_equal(inputs, 0.0), tf.float32)
    return np.array(mask)


input_mask = create_input_mask(X_train)
print(input_mask.shape)

In [None]:
# Define a custom layer for applying weights to the dot product
class WeightedDotProduct(layers.Layer):
    def __init__(self, **kwargs):
        super(WeightedDotProduct, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer
        self.kernel = self.add_weight(name='kernel',
                                      shape=(input_shape[2], 1),
                                      initializer='uniform',
                                      trainable=True)
        super(WeightedDotProduct, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        return tf.matmul(x, self.kernel)

    
    
def scaled_dot_product_attention(q, k, v, mask=None, dropout=None):
    """
    Computes scaled dot-product attention.

    Args:
        q (tensor): Query tensor of shape (..., seq_len_q, depth_k).
        k (tensor): Key tensor of shape (..., seq_len_k, depth_k).
        v (tensor): Value tensor of shape (..., seq_len_v, depth_v).
        mask (tensor, optional): Optional mask for the attention weights.
                                 Shape broadcastable to (..., seq_len_q, seq_len_k).
        dropout (function, optional): Optional dropout function to be applied to attention weights.

    Returns:
        tuple: A tuple containing the attention output and the attention weights.
               Output shape is (..., seq_len_q, depth_v) and attention weights shape is (..., seq_len_q, seq_len_k).
    """
    # Compute the dot product of Q and K, transposing the last two dimensions of K
    matmul_qk = tf.matmul(q, k, transpose_b=True)  # shape: (..., seq_len_q, seq_len_k)

    # Scale the dot product by the square root of the depth of K
    dk = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    # Apply the mask if provided
    if mask is not None:
        scaled_attention_logits += (mask * -1e9)

    # Apply softmax to compute attention weights
    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)

    # Apply dropout to the attention weights if provided
    if dropout is not None:
        attention_weights = dropout(attention_weights)

    # Multiply attention weights with V to get the output
    output = tf.matmul(attention_weights, v)

    return output, attention_weights
    
import tensorflow as tf
from tensorflow.keras import layers

# Define the MultiHeadAttention layer
heads = 4  # Choose the number of heads
d_model = 128  # Choose the model dimension

multi_head_attention = tf.keras.layers.MultiHeadAttention(num_heads=heads, key_dim=d_model//heads)


def build_lstm_seq2seq_multihead_attention(input_shape, n_units, N_values_to_predict):
    
    input_layer = tf.keras.Input(shape=input_shape, name='Input')

    encoder_x, encoder_h, encoder_c = tf.keras.layers.LSTM(units=n_units, return_sequences=True, return_state=True)(input_layer)

    decoder_in = tf.keras.layers.RepeatVector(1)(encoder_h)

    x = tf.keras.layers.LSTM(units=n_units, return_sequences=True, return_state=False)(decoder_in, initial_state=[encoder_h, encoder_c])

    decoder_x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=int(n_units/2), return_sequences=True, return_state=False))(x)

    #print(decoder_x.shape)
    #print(decoder_in.shape)
    
    #print(encoder_x.shape)
    #print(encoder_c.shape)
    #print(encoder_h.shape)

    # Create query, key, and value inputs
    query = decoder_x
    key = attention
    value = encoder_h  # You can modify this based on your requirements

    # Apply MultiHeadAttention with explicit query, key, value inputs
    attention_output = multi_head_attention(query, key, value, return_attention_scores=True)

    # Get the attention output and attention weights
    output = attention_output[0]
    attention_weights = attention_output[1]

    # Apply the weighted dot product using the attention weights
    weighted_dot_product = tf.keras.layers.Dot(axes=[2, 2])([attention_weights, value])

    context = tf.keras.layers.Dot(axes=[2, 1])([weighted_dot_product, encoder_x])

    concatenated_c = tfkl.Concatenate()([context, decoder_x])
    concatenated_c = tfkl.Flatten()(concatenated_c)
    
    output_layer = tfkl.Dense(N_values_to_predict)(concatenated_c)  # number of telescope values

    model = tfk.Model(inputs=[input_layer, category_input], outputs=output_layer, name='model')

    return model

In [None]:
## input_shape = (None, 1)

epochs = 20

batch_size = 128

lstm_units = 64

N_values_to_predict=9

#with tpu_strategy.scope():
    
model = build_lstm_seq2seq_multihead_attention(input_shape,lstm_units,9)
    
model.compile(loss=tfk.losses.MeanSquaredError(), optimizer=tfk.optimizers.Adam(1e-3), metrics=['mae'], steps_per_execution=32)

model.summary()
tfk.utils.plot_model(model, expand_nested=True) 

In [None]:
#with tpu_strategy.scope():

history = model.fit(
    
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs = epochs,
    batch_size = batch_size,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.5, min_lr=1e-5)
    ]
).history

In [None]:
import shutil

model.save('SubmissionModel_convnuovo')
shutil.make_archive('SubmissionModel_convnuovo', 'zip', 'SubmissionModel_convnuovo')

In [None]:

def residual_block(x, filters, kernel_size=3, activation='relu', padding='same'):
    # Shortcut
    shortcut = x

    # First convolution
    x = Conv1D(filters=filters, kernel_size=kernel_size, activation=activation, padding=padding)(x)

    # Second convolution
    x = Conv1D(filters=filters, kernel_size=kernel_size, activation=activation, padding=padding)(x)

    # Adjust dimensions of the shortcut
    shortcut = Conv1D(filters=filters, kernel_size=1, activation=None, padding='same')(shortcut)

    # Add shortcut to the output
    x = tfkl.Add()([x, shortcut])

    return x


def build_conv1d_lstm_resnet(input_shape, n_units, N_values_to_predict=9, filters=64,kernel_size=3):

    input_layer = tfkl.Input(shape=input_shape, name='Input')

    # Apply 1D Convolutional layer with residual block
    conv1d_layer = residual_block(input_layer, filters=filters, kernel_size=kernel_size, activation='relu', padding='same')
    maxpooling_layer = MaxPooling1D(pool_size=2)(conv1d_layer)

    # Apply 1D Convolutional layer with residual block
    conv1d_layer = residual_block(maxpooling_layer, filters=128, kernel_size=3, activation='relu', padding='same')
    maxpooling_layer = MaxPooling1D(pool_size=2)(conv1d_layer)

    # LSTM Encoder
    encoder_x, encoder_h, encoder_c = tfkl.LSTM(units=n_units, return_sequences=True, return_state=True)(maxpooling_layer)

    decoder_in = tfkl.RepeatVector(1)(encoder_h)

    x = tfkl.LSTM(units=n_units, return_sequences=True,return_state=False)(decoder_in,initial_state=[encoder_h,encoder_c])
    decoder_x = tfkl.Bidirectional(tfkl.LSTM(units=int(n_units/2), return_sequences=True,return_state=False))(x)

    attention = tfkl.Dot(axes=[2,2])([decoder_x, encoder_x])
    attention = tfkl.Activation('softmax')(attention)
    context = tfkl.Dot(axes=[2,1])([attention,encoder_x])

    
    concatenated_c = tfkl.Concatenate()([context,decoder_x])
    concatenated_c = tfkl.Flatten()(concatenated_c)

    concatenated_c = Dropout(0.5)(concatenated_c)  # Add dropout for regularization

    output_layer = tfkl.Dense(N_values_to_predict, activation='linear')(concatenated_c) #number of my telescope values!!!!!

    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='build_conv1d_lstm_resnet')

    return model


In [None]:
input_shape = (None, 1)

epochs = 20

batch_size = 128

lstm_units = 64

N_values_to_predict=9

#with tpu_strategy.scope():
    
model = build_conv1d_lstm_resnet(input_shape,lstm_units)
    
model.compile(loss=tfk.losses.MeanSquaredError(), optimizer=tfk.optimizers.Adam(1e-3), metrics=['mae'], steps_per_execution=32)

model.summary()
tfk.utils.plot_model(model, expand_nested=True) 

In [None]:

history = model.fit(
    
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs = epochs,
    batch_size = batch_size,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.5, min_lr=1e-5)
    ]
).history

In [None]:
import shutil

model.save('conv_lstm_resnet_attention')
shutil.make_archive('conv_lstm_resnet_attention', 'zip', 'conv_lstm_resnet_attention')

## Attempt to create a resnet-like block with multihead attention 

In [None]:

def multihead_attention(query, key, value, heads, d_model):
    """
    MultiHeadAttention layer
    """
    concat = tf.keras.layers.Concatenate(axis=-1)
    dense = tf.keras.layers.Dense(units=d_model, activation='linear')

    # Split the query, key, and value into multiple heads
    query = tf.keras.layers.Lambda(lambda x: tf.split(x, heads, axis=-1))(query)
    key = tf.keras.layers.Lambda(lambda x: tf.split(x, heads, axis=-1))(key)
    value = tf.keras.layers.Lambda(lambda x: tf.split(x, heads, axis=-1))(value)

    # Apply attention to each head
    attention_heads = []
    for i in range(heads):
        attention_heads.append(tf.keras.layers.Attention()([query[i], key[i], value[i]]))

    # Concatenate the attention heads
    attention = concat(attention_heads)

    # Final linear layer
    output = dense(attention)

    return output

from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout, Dense, Flatten, RepeatVector, Bidirectional, LSTM, Add, Dot, Activation, Concatenate
from tensorflow.keras.models import Model



def residual_block_with_attention(x, filters, kernel_size=3, activation='relu', padding='same', heads=4, d_model=64):
    # Shortcut
    shortcut = x

    # First convolution
    x = Conv1D(filters=filters, kernel_size=kernel_size, activation=activation, padding=padding)(x)

    # Second convolution
    x = Conv1D(filters=filters, kernel_size=kernel_size, activation=activation, padding=padding)(x)

    # Adjust dimensions of the shortcut
    shortcut = Conv1D(filters=filters, kernel_size=1, activation=None, padding='same')(shortcut)

    # Add multihead attention to the output
    x = multihead_attention(x, x, x, heads, d_model)

    # Apply a linear transformation to the shortcut to match the dimensions
    shortcut = Conv1D(filters=d_model, kernel_size=1, activation=None, padding='same')(shortcut)

    # Add shortcut to the output
    x = Add()([x, shortcut])

    return x


def build_conv1d_lstm_resnet_with_attention(input_shape, n_units, N_values_to_predict=9, filters=64, kernel_size=3, heads=4, d_model=64):
    input_layer = tf.keras.layers.Input(shape=input_shape, name='Input')

    # Apply 1D Convolutional layer with residual block and attention
    conv1d_layer = residual_block_with_attention(input_layer, filters=filters, kernel_size=kernel_size, activation='relu', padding='same', heads=heads, d_model=d_model)
    maxpooling_layer = MaxPooling1D(pool_size=2)(conv1d_layer)

    # Apply 1D Convolutional layer with residual block and attention
    conv1d_layer = residual_block_with_attention(maxpooling_layer, filters=128, kernel_size=3, activation='relu', padding='same', heads=heads, d_model=d_model)
    maxpooling_layer = MaxPooling1D(pool_size=2)(conv1d_layer)

    # LSTM Encoder
    encoder_x, encoder_h, encoder_c = LSTM(units=n_units, return_sequences=True, return_state=True)(maxpooling_layer)

    decoder_in = RepeatVector(1)(encoder_h)

    x = LSTM(units=n_units, return_sequences=True, return_state=False)(decoder_in, initial_state=[encoder_h, encoder_c])
    decoder_x = Bidirectional(LSTM(units=int(n_units/2), return_sequences=True, return_state=False))(x)

    attention = Dot(axes=[2, 2])([decoder_x, encoder_x])
    attention = Activation('softmax')(attention)
    context = Dot(axes=[2, 1])([attention, encoder_x])

    concatenated_c = Concatenate()([context, decoder_x])
    concatenated_c = Flatten()(concatenated_c)

    concatenated_c = Dropout(0.5)(concatenated_c)  # Add dropout for regularization

    output_layer = Dense(N_values_to_predict, activation='linear')(concatenated_c)

    model = Model(inputs=input_layer, outputs=output_layer, name='build_conv1d_lstm_resnet_with_attention')

    return model

In [None]:
input_shape = (None, 1)

epochs = 100

batch_size = 128

lstm_units = 64

N_values_to_predict=9

#with tpu_strategy.scope():
    
model = build_conv1d_lstm_resnet_with_attention(input_shape,lstm_units)
    
model.compile(loss=tfk.losses.MeanSquaredError(), optimizer=tfk.optimizers.Adam(1e-3), metrics=['mae'], steps_per_execution=32)

model.summary()
tfk.utils.plot_model(model, expand_nested=True) 

In [None]:
#with tpu_strategy.scope():

history = model.fit(
    
    X_train, y_train,
    validation_data=(X_val, y_val),
   # {'Input': X_train, 'Category_Input': cat_train}, y_train, # Provide input data as a dictionary

    #validation_data=({'Input': X_val, 'Category_Input': cat_val}, y_val), # Provide input data as a dictionary

    epochs = epochs,
    batch_size = batch_size,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.5, min_lr=1e-5)
    ]
).history

In [None]:
import shutil

model.save('SubmissionModel_resnet_attention')
shutil.make_archive('SubmissionModel_resnet_attention', 'zip', 'SubmissionModel_resnet_attention')

## General comments on the resnet-like architecture
The performance seemed good in validation but there is clearly some mistake on the implementation. As we do not understand the error and we prefer not to act blindly, we leave it like this and move to more understandable models.

In [None]:
'''
from sklearn.preprocessing import RobustScaler

class model:
    def __init__(self, path):
        self.model = tf.keras.models.load_model(os.path.join(path, 'SubmissionModel'))

    def predict(self, X, categories):

        unique_categories = np.unique(categories)
        category_to_int = {category: i for i, category in enumerate(unique_categories)}
        final_categories = np.array([category_to_int[category] for category in categories])

        categories=final_categories
       
        input_data = {'Input': X, 'Category_Input': categories}
        
        out = self.model.predict(input_data)

        return out
    

# Assuming you have your original data X
X_pred = X[val_indexes[0][1:10],:]  # Your original data
c = categories[val_indexes[0][1:10]]  # Your original data
# Initialize the scaler
scaler = RobustScaler()

# Lists to store scaling parameters for each row
centering_params_list = []
scaling_params_list = []
X_scaled_list = []

# Scale each row of X independently
for i in range(X_pred.shape[0]):
    row = X_pred[i, :].reshape(-1, 1)  # Reshape to 2D array (column vector)

    # Create a mask for non-zero values
    non_zero_mask = row != 0

    # Apply RobustScaler only to non-zero values
    scaled_non_zero_data = scaler.fit_transform(row[non_zero_mask].reshape(-1, 1))
    
    # Create a new array with the same shape as the original row
    scaled_row = np.zeros_like(row, dtype=np.float64)
    scaled_row[non_zero_mask] = scaled_non_zero_data.flatten()

    # Save the scaling parameters for the current row
    centering_params_list.append(scaler.center_)
    scaling_params_list.append(scaler.scale_)

    # Store the scaled row
    X_scaled_list.append(scaled_row.flatten())

    
# Stack the scaled rows into a 2D array
X_scaled = np.vstack(X_scaled_list)

print(X_scaled.shape)

# Assuming you have X_test and cat_test for test data
my_model = model('/kaggle/working/')

# Assuming you have X_test and cat_test for test data
predictions = my_model.predict(X_scaled, cat_pred)

# Assuming you want to invert the scaling for the predictions
predictions_original_scale = np.zeros_like(predictions, dtype=np.float64)

# Lists to store inverted scaling for each row
for i in range(predictions.shape[0]):
    scaled_row = predictions[i, :].reshape(-1, 1)  # Reshape to 2D array (column vector)

    # Use the saved scaling parameters for the current row
    centering_params = centering_params_list[i]
    scaling_params = scaling_params_list[i]

    # Inverse transform for the current row
    row_original_scale = scaled_row * scaling_params + centering_params

    # Store the inverted scaled row
    predictions_original_scale[i, :] = row_original_scale.flatten()


plt.figure()
for i in range (X_scaled.shape[0]):
    plt.plot(predictions_original_scale[:,i])

predictions_original_scale.shape


# Perform your operations on the scaled data (if needed)

# Lists to store inverted scaling for each row
X_original_list = []

#HERE I WILL HAVE MU PREDICTIONS Y

# Invert the scaling for each row separately
for i in range(X_scaled.shape[0]):
    scaled_row = X_scaled[i, :].reshape(-1, 1)  # Reshape to 2D array (column vector)

    # Use the saved scaling parameters for the current row
    centering_params = centering_params_list[i]
    scaling_params = scaling_params_list[i]

    # Inverse transform for the current row
    X_original_non_zero_data = scaler.inverse_transform(scaled_row[non_zero_mask].reshape(-1, 1))
    
    # Create a new array with the same shape as the original row
    X_original_row = np.zeros_like(scaled_row, dtype=np.float64)
    X_original_row[non_zero_mask] = X_original_non_zero_data.flatten()

    # Store the inverted scaled row
    X_original_list.append(X_original_row.flatten())

# Stack the inverted scaled rows into a 2D array
X_original = np.vstack(X_original_list)

# Check if the two approaches result in the same data
print(np.allclose(X, X_original))

'''