In [None]:
!pip install tensorflow numpy pandas paho-mqtt keras-tuner

Collecting paho-mqtt
  Downloading paho_mqtt-2.1.0-py3-none-any.whl.metadata (23 kB)
Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading paho_mqtt-2.1.0-py3-none-any.whl (67 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.2/67.2 kB[0m [31m931.6 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, paho-mqtt, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5 paho-mqtt-2.1.0


In [None]:
from google.colab import files
uploaded = files.upload()

Saving renault_can_data.xlsx to renault_can_data.xlsx


In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, LSTM, GRU, Conv1D, MaxPooling1D, UpSampling1D,
                                    RepeatVector, Dense, TimeDistributed, Dropout,
                                    BatchNormalization, Concatenate, Add)
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import keras_tuner as kt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import warnings
import os
import json
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Global variables
X_test_global = None
timesteps_global = 20
n_features_global = 0

def convert_numpy_types(obj):
    """
    Convert numpy types to native Python types to avoid serialization issues
    """
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_numpy_types(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy_types(item) for item in obj]
    elif pd.isna(obj):
        return None
    else:
        return obj

def safe_float_conversion(value):
    """
    Safely convert numpy types to float, handling NaN and infinity
    """
    try:
        if pd.isna(value):
            return np.nan
        elif isinstance(value, (np.floating, np.integer)):
            float_val = float(value)
            if np.isinf(float_val):
                return np.nan
            return float_val
        elif isinstance(value, (int, float)):
            if np.isinf(value):
                return np.nan
            return float(value)
        else:
            return np.nan
    except (ValueError, TypeError, OverflowError):
        return np.nan

def decode_payload(row):
    """
    Decode CAN payload with proper error handling and numpy type conversion
    """
    try:
        payload = str(row['Payload']).replace(' ', '')
        # Handle cases where payload might be NaN or invalid
        if pd.isna(payload) or payload == 'nan':
            return pd.Series({})

        # Ensure payload has even length for proper byte parsing
        if len(payload) % 2 != 0:
            payload = '0' + payload

        bytes_data = [int(payload[i:i+2], 16) for i in range(0, len(payload), 2)]
        can_id = str(row['ID'])
        decoded = {}

        if can_id == '18FF10E5' and len(bytes_data) >= 1:
            decoded['Vehicle_Speed_kmh'] = float(bytes_data[0])  # Convert to native float

        elif can_id == '18FF50E5' and len(bytes_data) >= 6:
            decoded['BMS_Level'] = float(bytes_data[0])
            decoded['BMS_Voltage_V'] = float((bytes_data[2] << 8 | bytes_data[3]) * 0.1)
            decoded['BMS_Current_A'] = float(bytes_data[4] << 8 | bytes_data[5])

        elif can_id == '18FF21E5' and len(bytes_data) >= 1:
            decoded['Motor_Power_kW'] = float(bytes_data[0])

        elif can_id == '18FF31E5' and len(bytes_data) >= 1:
            decoded['Temperature_C'] = float(bytes_data[0])

        elif can_id == '18FF40E5' and len(bytes_data) >= 8:
            decoded['Charger_Status'] = float(bytes_data[0])
            decoded['alpha'] = float((bytes_data[1] << 8 | bytes_data[2]) * 0.01)
            decoded['betha'] = float((bytes_data[3] << 8 | bytes_data[4]) * 0.01)
            decoded['gamma'] = float(bytes_data[5] << 8 | bytes_data[6])
            decoded['khi'] = float(bytes_data[7])

        return pd.Series(decoded)

    except Exception as e:
        print(f"Error decoding row: {e}")
        return pd.Series({})

def compute_acceleration(df):
    """Compute acceleration from vehicle speed with numpy type handling"""
    # Create a copy to avoid warnings
    df = df.copy()

    # Convert speed to m/s with safe conversion
    df['speed_m_s'] = df['Vehicle_Speed_kmh'].fillna(0).astype(float) / 3.6

    # Calculate time differences
    df['delta_time'] = df['Timestamp'].diff().dt.total_seconds().astype(float)

    # Calculate speed differences
    df['delta_speed'] = df['speed_m_s'].diff().astype(float)

    # Calculate acceleration with proper handling of division by zero
    df['acceleration'] = np.where(
        (df['delta_time'] > 0) & (df['delta_time'].notna()),
        df['delta_speed'] / df['delta_time'],
        0.0
    ).astype(float)

    return df

def check_data_quality(df):
    """Check data quality and report issues with numpy type handling"""
    print("\n" + "="*50)
    print("DATA QUALITY REPORT")
    print("="*50)

    # Convert shapes to native Python types
    shape_info = (int(df.shape[0]), int(df.shape[1]))
    memory_usage = float(df.memory_usage(deep=True).sum() / 1024**2)

    print(f"Dataset shape: {shape_info}")
    print(f"Memory usage: {memory_usage:.2f} MB")

    duplicates = int(df.duplicated().sum())
    print(f"Duplicate rows: {duplicates}")

    missing = df.isnull().sum()
    if int(missing.sum()) > 0:
        print(f"\nMissing values:")
        for col, count in missing[missing > 0].items():
            print(f"  {col}: {int(count)}")
    else:
        print("\nNo missing values found.")

    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        infinite_vals = np.isinf(df[numeric_cols]).sum()
        if int(infinite_vals.sum()) > 0:
            print(f"\nInfinite values:")
            for col, count in infinite_vals[infinite_vals > 0].items():
                print(f"  {col}: {int(count)}")
        else:
            print("\nNo infinite values found.")

        print(f"\nNumeric data ranges:")
        # Convert describe output to avoid numpy types
        desc_stats = df[numeric_cols].describe()
        for col in desc_stats.columns:
            print(f"{col}:")
            for stat in desc_stats.index:
                value = safe_float_conversion(desc_stats.loc[stat, col])
                print(f"  {stat}: {value:.6f}" if not np.isnan(value) else f"  {stat}: NaN")

def prepare_can_data():
    """Prepare and process CAN data with numpy type handling"""
    print("PREPARING CAN DATA")
    print("=" * 60)

    try:
        df = pd.read_excel('renault_can_data.xlsx')
        df.to_csv('data.csv', index=False, encoding='utf-8')
        print("Excel file converted to CSV")
    except FileNotFoundError:
        print("Warning: renault_can_data.xlsx not found, trying to read existing CSV...")
        try:
            df = pd.read_csv('data.csv', parse_dates=['Timestamp'])
        except FileNotFoundError:
            print("Error: No data file found. Please provide renault_can_data.xlsx")
            return None, None, None

    print("Decoding CAN payloads...")
    decoded_df = df.apply(decode_payload, axis=1)

    result_df = pd.concat([df, decoded_df], axis=1)
    result_df.to_csv('decoded_can_data.csv', index=False)
    print(" Decoding complete. Output saved to 'decoded_can_data.csv'")

    print("Reading decoded data...")
    data = pd.read_csv("decoded_can_data.csv", parse_dates=['Timestamp'])
    print(f" Data shape: {data.shape}")

    print("Creating one-hot encoding for CAN IDs...")
    one_hot_encoded = pd.get_dummies(data['ID'], prefix='ID').astype(float)  # Ensure float type
    data = pd.concat([data, one_hot_encoded], axis=1)

    print("Computing acceleration...")
    data = compute_acceleration(data)
    data['acceleration'].fillna(0.0, inplace=True)
    data['acceleration'] = data['acceleration'].astype(float)

    features = [
        'BMS_Current_A', 'BMS_Level', 'BMS_Voltage_V', 'Charger_Status',
        'Motor_Power_kW', 'Temperature_C', 'Vehicle_Speed_kmh','ID_18FF10E5','ID_18FF21E5','ID_18FF50E5','ID_18FF31E5','ID_18FF40E5','acceleration']
    existing_features = [f for f in features if f in data.columns]
    missing_features = [f for f in features if f not in data.columns]

    print(f" Existing features: {existing_features}")
    if missing_features:
        print(f" Missing features: {missing_features}")

    if existing_features:
        print("Interpolating missing values...")
        df_features = data[existing_features].copy()

        # Convert columns to numeric with explicit float64 type
        for col in existing_features:
            df_features[col] = pd.to_numeric(df_features[col], errors='coerce').astype(np.float64)

        df_features = df_features.interpolate(method='linear', axis=0)

        # Fill any remaining NaN values with 0 *before* scaling
        df_features = df_features.fillna(0.0).astype(np.float64)

        n_features = len(existing_features)
        print(f" Number of features: {n_features}")

        # Check for any remaining NaN values (should be 0 after fillna)
        nan_counts = df_features.isnull().sum()
        if int(nan_counts.sum()) > 0:
            print(f" NaN counts after interpolation and fillna:")
            for col, count in nan_counts[nan_counts > 0].items():
                print(f"  {col}: {int(count)}")

        print("Scaling data...")
        scaler = StandardScaler()
        data_scaled = scaler.fit_transform(df_features)
        print("Data scaled successfully.")

        # Convert back to DataFrame with proper column names and ensure float64
        data_scaled = pd.DataFrame(data_scaled, columns=df_features.columns, dtype=np.float64)

        # Check for any infinite or NaN values in scaled data
        inf_count = int(np.isinf(data_scaled).sum().sum())
        nan_count = int(data_scaled.isnull().sum().sum())

        if inf_count > 0:
            print(f" Infinite values in scaled data: {inf_count}")
            data_scaled = data_scaled.replace([np.inf, -np.inf], 0.0)

        if nan_count > 0:
            print(f" NaN values in scaled data: {nan_count}")

        # Ensure all values are finite
        data_scaled = data_scaled.fillna(0.0)
        data_scaled = data_scaled.replace([np.inf, -np.inf], 0.0)

        # Save the processed data
        data.to_csv('processed_can_data.csv', index=False)
        print(" Processed data saved to 'processed_can_data.csv'")

        # Run data quality check
        check_data_quality(data_scaled)

        # Convert to numpy array with explicit float32 for TensorFlow compatibility
        return data_scaled.values.astype(np.float32), scaler, existing_features

    else:
        print(" No valid features found in the data!")
        print("Available columns:", data.columns.tolist())
        return None, None, None

def create_sequences(data, timesteps=20):
    """Create sequences for training with proper numpy type handling"""
    print(f"Creating sequences with timesteps={timesteps}...")

    if data is None:
        return None

    n_samples = int(len(data) - timesteps + 1)
    n_features = int(data.shape[1])

    # Use float32 for TensorFlow compatibility
    sequences = np.zeros((n_samples, timesteps, n_features), dtype=np.float32)

    for i in range(n_samples):
        sequences[i] = data[i:i+timesteps].astype(np.float32)

    print(f"✓ Created {n_samples} sequences with shape {sequences.shape}")
    return sequences

def prepare_data(data, test_size=0.2, timesteps=20):
    """Prepare data for training with numpy type handling"""
    global timesteps_global, n_features_global

    print("PREPARING DATA FOR TRAINING")
    print("=" * 60)

    # Create sequences
    sequences = create_sequences(data, timesteps)

    if sequences is None:
        return None, None, None

    # Split data
    X_train, X_test = train_test_split(sequences, test_size=test_size, random_state=42)

    # Ensure float32 type for TensorFlow
    X_train = X_train.astype(np.float32)
    X_test = X_test.astype(np.float32)

    # Update global variables
    timesteps_global = int(timesteps)
    n_features_global = int(sequences.shape[2])

    print(f" Training data shape: {X_train.shape}")
    print(f" Test data shape: {X_test.shape}")

    return X_train, X_test, int(sequences.shape[2])

# ============================================================================
# CONSTRAINED MODEL ARCHITECTURES WITH NUMPY TYPE HANDLING
# ============================================================================

def build_lstm_autoencoder_architecture(timesteps, n_features, latent_dim=64,
                                     activation='relu', dropout_rate=0.2,
                                     learning_rate=0.001, optimizer='adam'):
    """Build constrained LSTM autoencoder with proper type handling"""
    # CONSTRAINT: Limit latent_dim to 16-64 range
    latent_dim = max(8, min(64, int(latent_dim)))
    timesteps = int(timesteps)
    n_features = int(n_features)
    dropout_rate = float(dropout_rate)
    learning_rate = float(learning_rate)

    print(f"\n BUILDING CONSTRAINED LSTM AUTOENCODER")
    print("=" * 50)
    print(f" Input shape: ({timesteps}, {n_features})")
    print(f" Latent dimension: {latent_dim}")
    print(f" Activation: {activation}")
    print(f" Dropout rate: {dropout_rate}")
    print(f" Optimizer: {optimizer}")


    # Input layer
    input_layer = Input(shape=(timesteps, n_features), name="LSTM_Input", dtype=tf.float32)

    # ENCODER - Max 2 layers
    intermediate_dim = min(64, max(latent_dim * 2, 32))

    encoder = LSTM(intermediate_dim, activation=activation, return_sequences=True,
                   name="Encoder_LSTM_1", dtype=tf.float32)(input_layer)

    if dropout_rate > 0:
        encoder = Dropout(dropout_rate, name="Encoder_Dropout_1")(encoder)

    encoder = LSTM(latent_dim, activation=activation, return_sequences=False,
                   name="Encoder_LSTM_2", dtype=tf.float32)(encoder)

    if dropout_rate > 0:
        encoder = Dropout(dropout_rate, name="Encoder_Dropout_2")(encoder)

    # DECODER
    decoder = RepeatVector(timesteps, name="Repeat_Vector")(encoder)

    decoder = LSTM(latent_dim, activation=activation, return_sequences=True,
                   name="Decoder_LSTM_1", dtype=tf.float32)(decoder)

    if dropout_rate > 0:
        decoder = Dropout(dropout_rate, name="Decoder_Dropout_1")(decoder)

    decoder = LSTM(intermediate_dim, activation=activation, return_sequences=True,
                   name="Decoder_LSTM_2", dtype=tf.float32)(decoder)

    if dropout_rate > 0:
        decoder = Dropout(dropout_rate, name="Decoder_Dropout_2")(decoder)

    # Output layer
    output_layer = TimeDistributed(Dense(n_features, activation='sigmoid', dtype=tf.float32),
                                  name="LSTM_Output")(decoder)

    # Create model
    model = Model(inputs=input_layer, outputs=output_layer, name="Constrained_LSTM_Autoencoder")

    # Compile model
    selected_optimizer = Adam(learning_rate=learning_rate) if optimizer == 'adam' else RMSprop(learning_rate=learning_rate)
    model.compile(
        optimizer=selected_optimizer,
        loss='mse',
        metrics=['mae']
    )

    total_params = int(model.count_params())
    print(f" Total parameters: {total_params:,}")

    return model

def build_constrained_conv1d_autoencoder(timesteps, n_features, filters=32,
                                        kernel_size=3, learning_rate=0.001, dropout_rate=0.2, optimizer='adam'):
    """Constrained Conv1D Autoencoder with proper type handling"""
    # Convert and constrain parameters
    filters = max(8, min(64, int(filters)))
    kernel_size = int(kernel_size)
    timesteps = int(timesteps)
    n_features = int(n_features)
    learning_rate = float(learning_rate)
    dropout_rate = float(dropout_rate)

    print(f"\n BUILDING CONSTRAINED CONV1D AUTOENCODER")
    print(f" Filters: {filters}, Kernel: {kernel_size}, Optimizer: {optimizer}")

    input_layer = Input(shape=(timesteps, n_features), name="Conv1D_Input", dtype=tf.float32)

    # ENCODER
    x = Conv1D(filters, kernel_size, activation='relu', padding='same',
               name="Conv1D_1", dtype=tf.float32)(input_layer)
    x = BatchNormalization(name="BN_1")(x)
    if dropout_rate > 0:
        x = Dropout(dropout_rate, name="Dropout_1")(x)

    x = MaxPooling1D(2, padding='same', name="Pool_1")(x)

    reduced_filters = max(16, filters // 2)
    encoded = Conv1D(reduced_filters, kernel_size, activation='relu', padding='same',
                     name="Conv1D_2", dtype=tf.float32)(x)
    encoded = BatchNormalization(name="BN_2")(encoded)

    # DECODER
    x = Conv1D(reduced_filters, kernel_size, activation='relu', padding='same',
               name="DeConv1D_1", dtype=tf.float32)(encoded)
    x = BatchNormalization(name="BN_3")(x)
    if dropout_rate > 0:
        x = Dropout(dropout_rate, name="Dropout_2")(x)

    x = UpSampling1D(2, name="Upsample_1")(x)

    # Ensure correct timesteps
    if x.shape[1] != timesteps:
        if x.shape[1] < timesteps:
            pad_needed = timesteps - x.shape[1]
            x = tf.keras.layers.Lambda(
                lambda x: tf.pad(x, [[0,0], [0, pad_needed], [0,0]], 'symmetric')
            )(x)
        else:
            x = tf.keras.layers.Lambda(lambda x: x[:, :timesteps, :])(x)

    # Final reconstruction layer
    decoded = Conv1D(n_features, kernel_size, activation='sigmoid', padding='same',
                     name="Output_Conv", dtype=tf.float32)(x)

    model = Model(inputs=input_layer, outputs=decoded, name="Constrained_Conv1D_Autoencoder")
    selected_optimizer = Adam(learning_rate=learning_rate) if optimizer == 'adam' else RMSprop(learning_rate=learning_rate)
    model.compile(optimizer=selected_optimizer, loss='mse', metrics=['mae'])

    print(f" Conv1D model: {int(model.count_params()):,} parameters")
    return model

def build_constrained_gru_autoencoder(timesteps, n_features, units=32,
                                     learning_rate=0.001, dropout_rate=0.2, optimizer='adam'):
    """Constrained GRU Autoencoder with proper type handling"""
    # Convert and constrain parameters
    units = max(8, min(64, int(units)))
    timesteps = int(timesteps)
    n_features = int(n_features)
    learning_rate = float(learning_rate)
    dropout_rate = float(dropout_rate)

    print(f"\n BUILDING CONSTRAINED GRU AUTOENCODER")
    print(f" Units: {units}, Optimizer: {optimizer}")

    input_layer = Input(shape=(timesteps, n_features), name="GRU_Input", dtype=tf.float32)

    # ENCODER
    x = GRU(units, return_sequences=True, dropout=dropout_rate,
            name="GRU_Encoder_1", dtype=tf.float32)(input_layer)
    encoder_output = GRU(max(16, units//2), return_sequences=False, dropout=dropout_rate,
                        name="GRU_Encoder_2", dtype=tf.float32)(x)

    # DECODER
    decoder = RepeatVector(timesteps, name="Repeat_Vector")(encoder_output)
    x = GRU(max(16, units//2), return_sequences=True, dropout=dropout_rate,
            name="GRU_Decoder_1", dtype=tf.float32)(x)
    decoder_output = GRU(units, return_sequences=True, dropout=dropout_rate,
                        name="GRU_Decoder_2", dtype=tf.float32)(x)

    # Output layer
    output_layer = TimeDistributed(Dense(n_features, activation='sigmoid', dtype=tf.float32),
                                  name="GRU_Output")(decoder_output)

    model = Model(inputs=input_layer, outputs=output_layer, name="Constrained_GRU_Autoencoder")
    selected_optimizer = Adam(learning_rate=learning_rate) if optimizer == 'adam' else RMSprop(learning_rate=learning_rate)
    model.compile(optimizer=selected_optimizer, loss='mse', metrics=['mae'])

    print(f" GRU model: {int(model.count_params()):,} parameters")
    return model

def build_constrained_hybrid_autoencoder(timesteps, n_features,
                                        conv_filters=32, lstm_units=32, gru_units=16,
                                        learning_rate=0.001, dropout_rate=0.2, optimizer='adam'):
    """Constrained Hybrid Autoencoder with proper type handling"""
    # Convert and constrain parameters
    conv_filters = max(16, min(64, int(conv_filters)))
    lstm_units = max(16, min(64, int(lstm_units)))
    gru_units = max(16, min(32, int(gru_units)))
    timesteps = int(timesteps)
    n_features = int(n_features)
    learning_rate = float(learning_rate)
    dropout_rate = float(dropout_rate)

    print(f"\n BUILDING CONSTRAINED HYBRID AUTOENCODER")
    print(f" Conv filters: {conv_filters}, LSTM units: {lstm_units}, GRU units: {gru_units}, Optimizer: {optimizer}")

    input_layer = Input(shape=(timesteps, n_features), name="Hybrid_Input", dtype=tf.float32)

    # BRANCH 1: Conv1D path
    conv_branch = Conv1D(conv_filters//2, 3, activation='relu', padding='same',
                        name="Conv_Branch", dtype=tf.float32)(input_layer)
    conv_branch = Dropout(dropout_rate, name="Conv_Dropout")(conv_branch)

    # BRANCH 2: LSTM path
    lstm_branch = LSTM(lstm_units, return_sequences=True, dropout=dropout_rate,
                      name="LSTM_Branch", dtype=tf.float32)(input_layer)

    # BRANCH 3: GRU path
    gru_branch = GRU(gru_units, return_sequences=True, dropout=dropout_rate,
                    name="GRU_Branch", dtype=tf.float32)(input_layer)

    # FUSION
    combined = Concatenate(name="Fusion")([conv_branch, lstm_branch, gru_branch])

    # ENCODER
    encoder = LSTM(lstm_units, return_sequences=False, dropout=dropout_rate,
                  name="Encoder", dtype=tf.float32)(combined)

    # DECODER
    decoder = RepeatVector(timesteps, name="Decoder_Repeat")(encoder)
    decoder = LSTM(lstm_units, return_sequences=True, dropout=dropout_rate,
                  name="Decoder_LSTM", dtype=tf.float32)(decoder)

    # OUTPUT
    output_layer = TimeDistributed(Dense(n_features, activation='sigmoid', dtype=tf.float32),
                                  name="Hybrid_Output")(decoder)

    model = Model(inputs=input_layer, outputs=output_layer, name="Constrained_Hybrid_Autoencoder")
    selected_optimizer = Adam(learning_rate=learning_rate) if optimizer == 'adam' else RMSprop(learning_rate=learning_rate)
    model.compile(optimizer=selected_optimizer, loss='mse', metrics=['mae'])

    print(f" Hybrid model: {int(model.count_params()):,} parameters")
    return model

def build_constrained_attention_autoencoder(timesteps, n_features, embed_dim=32,
                                           num_heads=2, learning_rate=0.001, dropout_rate=0.2, optimizer='adam'):
    """Constrained Attention Autoencoder with proper type handling"""
    # Convert and constrain parameters
    embed_dim = max(8, min(64, int(embed_dim)))
    num_heads = max(1, min(4, int(num_heads)))
    timesteps = int(timesteps)
    n_features = int(n_features)
    learning_rate = float(learning_rate)
    dropout_rate = float(dropout_rate)

    print(f"\n BUILDING CONSTRAINED ATTENTION AUTOENCODER")
    print(f" Embed dim: {embed_dim}, Attention heads: {num_heads}, Optimizer: {optimizer}")

    input_layer = Input(shape=(timesteps, n_features), name="Attention_Input", dtype=tf.float32)

    # PROJECT TO EMBEDDING
    x = Dense(embed_dim, name="Input_Projection", dtype=tf.float32)(input_layer)

    # ATTENTION LAYER 1
    attention_output = tf.keras.layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=embed_dim//num_heads, name="Attention_1")(x, x)
    x = Add(name="Add_1")([x, attention_output])
    x = tf.keras.layers.LayerNormalization(epsilon=1e-6, name="Norm_1")(x)

    # ENCODER COMPRESSION
    encoder = LSTM(embed_dim//2, return_sequences=False, dropout=dropout_rate,
                  name="Encoder_Compress", dtype=tf.float32)(x)

    # DECODER EXPANSION
    decoder = RepeatVector(timesteps, name="Decoder_Repeat")(encoder)
    decoder = LSTM(embed_dim, return_sequences=True, dropout=dropout_rate,
                  name="Decoder_Expand", dtype=tf.float32)(decoder)

    # ATTENTION LAYER 2
    attention_output_2 = tf.keras.layers.MultiHeadAttention(
        num_heads=max(1, num_heads//2), key_dim=embed_dim//max(1, num_heads//2),
        name="Attention_2")(decoder, decoder)
    decoder = Add(name="Add_2")([decoder, attention_output_2])
    decoder = tf.keras.layers.LayerNormalization(epsilon=1e-6, name="Norm_2")(decoder)

    # OUTPUT PROJECTION
    output_layer = TimeDistributed(Dense(n_features, activation='sigmoid', dtype=tf.float32),
                                  name="Attention_Output")(decoder)

    model = Model(inputs=input_layer, outputs=output_layer, name="Constrained_Attention_Autoencoder")
    selected_optimizer = Adam(learning_rate=learning_rate) if optimizer == 'adam' else RMSprop(learning_rate=learning_rate)
    model.compile(optimizer=selected_optimizer, loss='mse', metrics=['mae'])

    print(f" Attention model: {int(model.count_params()):,} parameters")
    return model

# Builder functions for Keras Tuner with type conversion
def build_lstm_autoencoder(hp):
    """Build LSTM autoencoder with hyperparameters and type conversion"""
    return build_lstm_autoencoder_architecture(
        timesteps=timesteps_global,
        n_features=n_features_global,
        latent_dim=int(hp.Int('latent_dim', min_value=8, max_value=64, step=8)),
        activation=hp.Choice('activation', values=['relu', 'tanh']),
        dropout_rate=float(hp.Float('dropout_rate', min_value=0.0, max_value=0.4, step=0.1)),
        learning_rate=float(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop'])
    )

def build_conv1d_autoencoder(hp):
    """Build Conv1D autoencoder with hyperparameters and type conversion"""
    return build_constrained_conv1d_autoencoder(
        timesteps=timesteps_global,
        n_features=n_features_global,
        filters=int(hp.Int('filters', min_value=8, max_value=64, step=8)),
        kernel_size=int(hp.Choice('kernel_size', values=[3, 5])),
        dropout_rate=float(hp.Float('dropout_rate', min_value=0.0, max_value=0.4, step=0.1)),
        learning_rate=float(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop'])
    )

def build_gru_autoencoder(hp):
    """Build GRU autoencoder with hyperparameters and type conversion"""
    return build_constrained_gru_autoencoder(
        timesteps=timesteps_global,
        n_features=n_features_global,
        units=int(hp.Int('units', min_value=8, max_value=64, step=8)),
        dropout_rate=float(hp.Float('dropout_rate', min_value=0.0, max_value=0.4, step=0.1)),
        learning_rate=float(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop'])
    )

def build_hybrid_autoencoder(hp):
    """Build Hybrid autoencoder with hyperparameters and type conversion"""
    return build_constrained_hybrid_autoencoder(
        timesteps=timesteps_global,
        n_features=n_features_global,
        conv_filters=int(hp.Int('conv_filters', min_value=8, max_value=64, step=8)),
        lstm_units=int(hp.Int('lstm_units', min_value=8, max_value=64, step=8)),
        gru_units=int(hp.Int('gru_units', min_value=8, max_value=32, step=8)),
        dropout_rate=float(hp.Float('dropout_rate', min_value=0.0, max_value=0.4, step=0.1)),
        learning_rate=float(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop'])
    )

def build_attention_autoencoder(hp):
    """Build Attention autoencoder with hyperparameters and type conversion"""
    return build_constrained_attention_autoencoder(
        timesteps=timesteps_global,
        n_features=n_features_global,
        embed_dim=int(hp.Int('embed_dim', min_value=8, max_value=64, step=8)),
        num_heads=int(hp.Int('num_heads', min_value=1, max_value=4)),
        dropout_rate=float(hp.Float('dropout_rate', min_value=0.0, max_value=0.4, step=0.1)),
        learning_rate=float(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop'])
    )


def create_multi_model_tuners():
    """Create tuners for all five models"""
    models_config = {
        'LSTM': {
            'builder': build_lstm_autoencoder,
            'directory': 'lstm_tuning',
            'project': 'can_lstm_optimization'
        },
        'Conv1D': {
            'builder': build_conv1d_autoencoder,
            'directory': 'conv1d_tuning',
            'project': 'can_conv1d_optimization'
        },
        'GRU': {
            'builder': build_gru_autoencoder,
            'directory': 'gru_tuning',
            'project': 'can_gru_optimization'
        },
        'Hybrid': {
            'builder': build_hybrid_autoencoder,
            'directory': 'hybrid_tuning',
            'project': 'can_hybrid_optimization'
        },
        'Attention': {
            'builder': build_attention_autoencoder,
            'directory': 'attention_tuning',
            'project': 'can_attention_optimization'
        }
    }

    tuners = {}

    for model_name, config in models_config.items():
        # Clean directory if exists
        if os.path.exists(config['directory']):
            import shutil
            shutil.rmtree(config['directory'])

        tuners[model_name] = kt.RandomSearch(
            config['builder'],
            objective='val_loss',
            max_trials=25, # Increased number of trials
            directory=config['directory'],
            project_name=config['project'],
            overwrite=True
        )

    return tuners

def safe_shape_align(reconstructions, target_shape):
    """Safely align reconstruction shapes with target"""
    if reconstructions.shape == target_shape:
        return reconstructions.astype(np.float32), True

    print(f"⚠ Shape mismatch: {reconstructions.shape} vs {target_shape}")

    # Convert to float32 first
    reconstructions = reconstructions.astype(np.float32)

    # Try to fix common issues
    if len(reconstructions.shape) == len(target_shape):
        if reconstructions.shape[-1] == target_shape[-1]:  # Same features
            # Fix timesteps dimension
            if reconstructions.shape[1] > target_shape[1]:
                reconstructions = reconstructions[:, :target_shape[1], :]
            elif reconstructions.shape[1] < target_shape[1]:
                pad_needed = target_shape[1] - reconstructions.shape[1]
                reconstructions = np.pad(reconstructions, ((0,0), (0, pad_needed), (0,0)), 'constant')

            if reconstructions.shape == target_shape:
                return reconstructions.astype(np.float32), True

    print(f" Could not align shapes")
    return reconstructions.astype(np.float32), False

def train_and_evaluate_multi_models(X_train, X_test, timesteps_val, n_features_val):
    """Train and evaluate all five models with detailed comparison and type safety"""
    global timesteps_global, n_features_global, X_test_global
    timesteps_global = int(timesteps_val)
    n_features_global = int(n_features_val)
    X_test_global = X_test.astype(np.float32)

    print("\n" + "="*80)
    print(" MULTI-MODEL CAN DATA AUTOENCODER COMPARISON")
    print("="*80)
    print(f"Models: LSTM, Conv1D, GRU, Hybrid (Conv1D+LSTM+GRU), Attention")
    print(f"Data shape: {X_train.shape} -> {X_test.shape}")
    print(f"Features: {n_features_val}, Timesteps: {timesteps_val}")

    # Ensure data types
    X_train = X_train.astype(np.float32)
    X_test = X_test.astype(np.float32)

    # Create tuners
    tuners = create_multi_model_tuners()

    # Results storage
    results = {}

    # Search callbacks
    search_callbacks = [
        EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True, verbose=0),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-7, verbose=0)
    ]

    # Train each model
    for model_name, tuner in tuners.items():
        print(f"\n" + "="*70)
        print(f" TRAINING {model_name.upper()} AUTOENCODER")
        print("="*70)

        try:
            # Hyperparameter search
            print(f" Starting hyperparameter search for {model_name}...")
            tuner.search(
                X_train, X_train,
                epochs=30,
                validation_data=(X_test, X_test),
                callbacks=search_callbacks,
                verbose=1
            )

            # Check if any trials were completed successfully
            if not tuner.oracle.get_best_trials(1):
                 print(f" Hyperparameter search failed for {model_name}. No successful trials.")
                 raise ValueError("No successful trials in hyperparameter search")

            # Get best model and hyperparameters
            best_trial = tuner.oracle.get_best_trials(1)[0]
            best_hps = best_trial.hyperparameters
            best_model = tuner.get_best_models(num_models=1)[0]

            best_val_loss = float(best_trial.metrics.get_last_value('val_loss'))

            print(f" Best hyperparameters found for {model_name}")
            print(f"   Validation loss: {best_val_loss:.6f}")
            # Safely access batch_size as it might not always be tuned
            best_batch_size = best_hps.get('batch_size', 32)
            print(f"   Batch Size: {best_batch_size}")
            print(f"   Optimizer: {best_hps.get('optimizer', 'N/A')}")


            # Extended training
            print(f" Extended training for {model_name}...")

            extended_callbacks = [
                EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1),
                ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-8, verbose=1)
            ]

            history = best_model.fit(
                X_train, X_train,
                epochs=100, # Increased maximum epochs for extended training
                batch_size=best_batch_size,
                validation_data=(X_test, X_test),
                callbacks=extended_callbacks,
                verbose=1
            )

            # Evaluate model
            print(f" Evaluating {model_name}...")
            test_loss, test_mae = best_model.evaluate(X_test, X_test, verbose=0)

            # Convert to native Python types
            test_loss = float(test_loss)
            test_mae = float(test_mae)

            reconstructions = best_model.predict(X_test, verbose=0)

            # Safely align shapes
            reconstructions, shape_ok = safe_shape_align(reconstructions, X_test.shape)

            # Calculate additional metrics if shapes match
            if shape_ok:
                # Convert to numpy arrays and ensure float32
                X_test_np = X_test.astype(np.float32)
                reconstructions_np = reconstructions.astype(np.float32)

                mse = float(np.mean(np.square(X_test_np - reconstructions_np)))
                rmse = float(np.sqrt(mse))

                # Calculate R² score
                ss_res = float(np.sum(np.square(X_test_np - reconstructions_np)))
                ss_tot = float(np.sum(np.square(X_test_np - np.mean(X_test_np))))
                r2_score = float(1 - (ss_res / (ss_tot + 1e-8)))

                # Calculate mean absolute percentage error
                mape = float(np.mean(np.abs((X_test_np - reconstructions_np) / (X_test_np + 1e-8))) * 100)
            else:
                print(f" Could not calculate advanced metrics for {model_name} due to shape mismatch")
                mse, rmse, r2_score, mape = np.nan, np.nan, np.nan, np.nan

            # Store results with type conversion
            results[model_name] = {
                'model': best_model,
                'history': history,
                'hyperparameters': best_hps.values, # Store hyperparameters as dictionary
                'test_loss': safe_float_conversion(test_loss),
                'test_mae': safe_float_conversion(test_mae),
                'mse': safe_float_conversion(mse),
                'rmse': safe_float_conversion(rmse),
                'r2_score': safe_float_conversion(r2_score),
                'mape': safe_float_conversion(mape),
                'reconstructions': reconstructions,
                'tuner': tuner,
                'training_epochs': int(len(history.history['loss'])),
                'parameters': int(best_model.count_params()),
                'shape_aligned': bool(shape_ok)
            }

            print(f" {model_name} completed successfully!")
            print(f"   Test Loss: {test_loss:.6f}")
            if not np.isnan(r2_score):
                print(f"   R² Score: {r2_score:.4f}")
            print(f"   Parameters: {int(best_model.count_params()):,}")

        except Exception as e:
            print(f" Error training {model_name}: {str(e)}")
            print(f"   Skipping {model_name}...")
            continue

    if not results:
        print(" No models were successfully trained!")
        return None

    return results

def comprehensive_model_comparison(results):
    """Comprehensive comparison of all trained models with type safety"""
    if not results:
        print(" No results to compare!")
        return None, None

    print("\n" + "="*80)
    print(" COMPREHENSIVE MODEL PERFORMANCE COMPARISON")
    print("="*80)

    # Create detailed comparison DataFrame with type conversion
    comparison_data = []
    for model_name, result in results.items():
        comparison_data.append({
            'Model': str(model_name),
            'Test_Loss_MSE': safe_float_conversion(result.get('test_loss', np.nan)),
            'Test_MAE': safe_float_conversion(result.get('test_mae', np.nan)),
            'RMSE': safe_float_conversion(result.get('rmse', np.nan)),
            'R²_Score': safe_float_conversion(result.get('r2_score', np.nan)),
            'MAPE_%': safe_float_conversion(result.get('mape', np.nan)),
            'Training_Epochs': int(result.get('training_epochs', 0)),
            'Parameters': int(result.get('parameters', 0)),
            'Params_K': safe_float_conversion(result.get('parameters', 0) / 1000) if result.get('parameters') else np.nan,
            'Shape_Aligned': bool(result.get('shape_aligned', False))
        })

    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.sort_values('Test_Loss_MSE', na_position='last')

    print("\n PERFORMANCE RANKING (sorted by Test Loss):")
    print("-" * 80)
    print(comparison_df.to_string(index=False, float_format='%.6f'))

    # Determine best model for different criteria
    best_overall = None
    best_r2 = None
    most_efficient = None

    if not comparison_df['Test_Loss_MSE'].isnull().all():
        best_overall = str(comparison_df.iloc[0]['Model'])
        print(f"\n BEST MODELS BY CRITERIA:")
        print("-"* 40)
        print(f" Overall (Test Loss): {best_overall}")
        print(f"   Loss: {comparison_df.iloc[0]['Test_Loss_MSE']:.6f}")
        if not np.isnan(comparison_df.iloc[0]['R²_Score']):
            print(f"   R²: {comparison_df.iloc[0]['R²_Score']:.4f}")

    if not comparison_df['R²_Score'].isnull().all():
        best_r2_idx = comparison_df['R²_Score'].idxmax()
        best_r2 = str(comparison_df.loc[best_r2_idx, 'Model'])
        if best_r2 != best_overall:
             print(f"\n Best R² Score: {best_r2}")
             r2_row = comparison_df.loc[best_r2_idx]
             print(f"   R²: {r2_row['R²_Score']:.4f}")
             print(f"   Loss: {r2_row['Test_Loss_MSE']:.6f}")

    if not comparison_df['Parameters'].isnull().all():
        most_efficient_idx = comparison_df['Parameters'].idxmin()
        most_efficient = str(comparison_df.loc[most_efficient_idx, 'Model'])
        if most_efficient != best_overall and most_efficient != best_r2:
            print(f"\n Most Efficient (Parameters): {most_efficient}")
            eff_row = comparison_df.loc[most_efficient_idx]
            print(f"   Parameters: {int(eff_row['Parameters']):,}")
            print(f"   Loss: {eff_row['Test_Loss_MSE']:.6f}")

    return comparison_df, best_overall

def save_multi_model_results(results, comparison_df, best_model_name):
    """Save all comparison results and models with type safety"""
    if not results:
        print(" No results to save!")
        return

    print("\n" + "="*70)
    print(" SAVING MULTI-MODEL RESULTS")
    print("="*70)

    # Save comparison DataFrame
    if comparison_df is not None:
        comparison_df.to_csv('multi_model_performance_comparison.csv', index=False)
        print("Performance comparison saved to 'multi_model_performance_comparison.csv'")

    # Save best model
    if best_model_name and best_model_name in results:
        try:
            best_model = results[best_model_name]['model']
            best_model.save(f'best_model_{best_model_name.lower()}_autoencoder.h5')
            print(f" Best model ({best_model_name}) saved")
        except Exception as e:
            print(f" Error saving best model {best_model_name}: {e}")

    # Save all models and their data
    for model_name, result in results.items():
        try:
            # Save model
            if 'model' in result:
                result['model'].save(f'can_{model_name.lower()}_autoencoder.h5')
                print(f" {model_name} model saved")

            # Save hyperparameters with type conversion
            if 'hyperparameters' in result:
                # Hyperparameters are already a dictionary from best_hps.values
                hp_dict = convert_numpy_types(result['hyperparameters'])

                # Save as JSON for better type handling
                with open(f'can_{model_name.lower()}_hyperparameters.json', 'w') as f:
                    json.dump(hp_dict, f, indent=2)
                print(f" {model_name} hyperparameters saved")

            # Save training history with type conversion
            if 'history' in result and result['history']:
                history_dict = {}
                for key, values in result['history'].history.items():
                    history_dict[key] = [convert_numpy_types(v) for v in values]

                history_df = pd.DataFrame(history_dict)
                history_df.to_csv(f'can_{model_name.lower()}_training_history.csv', index=False)
                print(f" {model_name} training history saved")

        except Exception as e:
            print(f" Error saving results for {model_name}: {e}")

    # Save comprehensive report with type conversion
    try:
        with open('can_multi_model_comparison_report.txt', 'w') as f:
            f.write("CAN DATA MULTI-MODEL AUTOENCODER COMPARISON REPORT\n")
            f.write("=" * 60 + "\n\n")

            f.write("MODELS COMPARED:\n")
            f.write("- LSTM Autoencoder: Traditional recurrent approach\n")
            f.write("- Conv1D Autoencoder: Spatial pattern detection\n")
            f.write("- GRU Autoencoder: Efficient recurrent architecture\n")
            f.write("- Hybrid Autoencoder (Conv1D+LSTM+GRU): Combined approach\n")
            f.write("- Attention Autoencoder: Self-attention mechanism\n\n")

            f.write("PERFORMANCE RANKING:\n")
            f.write("-" * 30 + "\n")
            if comparison_df is not None:
                for i, row in comparison_df.iterrows():
                    f.write(f"{i+1}. {row['Model']}\n")
                    test_loss = safe_float_conversion(row['Test_Loss_MSE'])
                    r2_score = safe_float_conversion(row['R²_Score'])
                    params = int(row['Parameters']) if not np.isnan(row['Parameters']) else 0

                    f.write(f"   Test Loss: {test_loss:.6f}\n")
                    if not np.isnan(r2_score):
                        f.write(f"   R² Score: {r2_score:.4f}\n")
                    f.write(f"   Parameters: {params:,}\n\n")

            f.write(f"RECOMMENDED MODEL: {best_model_name if best_model_name else 'N/A'}\n")

        print(" Comprehensive report saved to 'can_multi_model_comparison_report.txt'")
    except Exception as e:
        print(f" Error saving report: {e}")

def create_comprehensive_visualizations(results, feature_names):
    """Create comprehensive comparison visualizations with type safety"""
    if not results:
        print(" No results to visualize!")
        return

    print("\n" + "="*70)
    print(" CREATING COMPREHENSIVE VISUALIZATIONS")
    print("="*70)

    try:
        model_names = list(results.keys())
        colors = ['blue', 'red', 'green', 'orange', 'purple'][:len(model_names)]

        # Create large figure with multiple subplots
        fig = plt.figure(figsize=(25, 20))

        # 1. Training Loss Comparison
        plt.subplot(4, 4, 1)
        for i, (model_name, result) in enumerate(results.items()):
            if 'history' in result and result['history']:
                loss_values = [safe_float_conversion(v) for v in result['history'].history['loss']]
                plt.plot(loss_values, label=f'{model_name} Train', color=colors[i], linewidth=2)

                if 'val_loss' in result['history'].history:
                    val_loss_values = [safe_float_conversion(v) for v in result['history'].history['val_loss']]
                    plt.plot(val_loss_values, label=f'{model_name} Val',
                            color=colors[i], linewidth=2, linestyle='--')

        plt.title('Training Loss Comparison', fontsize=14, fontweight='bold')
        plt.xlabel('Epoch')
        plt.ylabel('Loss (MSE)')
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.grid(True, alpha=0.3)

        # 2. Performance Metrics Bar Chart
        plt.subplot(4, 4, 2)
        test_losses = [safe_float_conversion(results[name].get('test_loss', np.nan)) for name in model_names]
        rmses = [safe_float_conversion(results[name].get('rmse', np.nan)) for name in model_names]
        r2_scores = [safe_float_conversion(results[name].get('r2_score', np.nan)) for name in model_names]

        # Normalize for visualization (excluding NaNs)
        valid_test_losses = np.array([loss for loss in test_losses if not np.isnan(loss)])
        valid_rmses = np.array([rmse for rmse in rmses if not np.isnan(rmse)])

        if valid_test_losses.size > 0:
            test_losses_norm = np.array(test_losses) / max(valid_test_losses)
        else:
            test_losses_norm = np.array(test_losses)

        if valid_rmses.size > 0:
            rmses_norm = np.array(rmses) / max(valid_rmses)
        else:
            rmses_norm = np.array(rmses)

        x = np.arange(len(model_names))
        width = 0.25

        plt.bar(x - width, test_losses_norm, width, label='Test Loss (norm)', alpha=0.7)
        plt.bar(x, rmses_norm, width, label='RMSE (norm)', alpha=0.7)
        plt.bar(x + width, r2_scores, width, label='R² Score', alpha=0.7)

        plt.title('Performance Metrics', fontsize=14, fontweight='bold')
        plt.xlabel('Models')
        plt.ylabel('Normalized Values')
        plt.xticks(x, model_names, rotation=45)
        plt.legend()
        plt.grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()

        print(" Visualizations created successfully")

    except Exception as e:
        print(f" Error creating visualizations: {e}")

def multi_model_can_pipeline():
    """Complete pipeline for comparing multiple autoencoder architectures on CAN data"""
    print("\n" + "="*80)
    print(" CAN DATA MULTI-MODEL AUTOENCODER COMPARISON PIPELINE")
    print("="*80)
    print(" Analyzing CAN bus data with 5 different autoencoder architectures:")
    print("   1. LSTM Autoencoder")
    print("   2. Conv1D Autoencoder")
    print("   3. GRU Autoencoder")
    print("   4. Hybrid Autoencoder (Conv1D+LSTM+GRU)")
    print("   5. Attention Autoencoder")
    print("="*80)

    # 1. Prepare CAN data
    data_scaled, scaler, feature_names = prepare_can_data()

    if data_scaled is None:
        print(" Failed to prepare data. Exiting.")
        return None, None, None

    # 2. Prepare sequences
    timesteps = 20
    X_train, X_test, n_features = prepare_data(data_scaled, timesteps=timesteps)

    if X_train is None or X_test is None or n_features is None:
        print(" Failed to prepare training data. Exiting.")
        return None, None, None

    print(f" Data prepared successfully:")
    print(f"   Training sequences: {X_train.shape}")
    print(f"   Test sequences: {X_test.shape}")
    print(f"   Features: {n_features}")
    print(f"   Timesteps: {timesteps}")

    # 3. Train and evaluate all models
    results = train_and_evaluate_multi_models(X_train, X_test, timesteps, n_features)

    if results is None or not results:
        print(" No models were successfully trained!")
        return None, None, None

    # 4. Compare performance
    comparison_df, best_model_name = comprehensive_model_comparison(results)

    # 5. Create visualizations
    if results:
        create_comprehensive_visualizations(results, feature_names)

    # 6. Save everything
    if results:
        save_multi_model_results(results, comparison_df, best_model_name)

    print("\n" + "="*80)
    print(" MULTI-MODEL CAN DATA ANALYSIS COMPLETED!")
    print("="*80)
    print(f" Best Overall Model: {best_model_name if best_model_name else 'N/A'}")
    print(f" Total Models Trained: {len(results)}")
    print(f" Results saved to multiple files")

    print("\n Generated Files:")
    print("- multi_model_performance_comparison.csv")
    print("- can_multi_model_comparison_report.txt")
    print("- best_model_[name]_autoencoder.h5")
    print("- can_[model]_autoencoder.h5 (for each model)")
    print("- can_[model]_hyperparameters.json")
    print("- can_[model]_training_history.csv")

    return results, comparison_df, best_model_name

# Run the complete pipeline
if __name__ == "__main__":
    # Test constrained architectures first
    print(" TESTING CONSTRAINED ARCHITECTURES")
    print("=" * 60)

    # Create dummy data for testing
    test_timesteps = 20
    test_n_features = 10

    test_models = {}

    try:
        test_models['LSTM'] = build_lstm_autoencoder_architecture(test_timesteps, test_n_features, latent_dim=32)
        test_models['Conv1D'] = build_constrained_conv1d_autoencoder(test_timesteps, test_n_features, filters=32)
        test_models['GRU'] = build_constrained_gru_autoencoder(test_timesteps, test_n_features, units=32)
        test_models['Hybrid'] = build_constrained_hybrid_autoencoder(test_timesteps, test_n_features)
        test_models['Attention'] = build_constrained_attention_autoencoder(test_timesteps, test_n_features, embed_dim=32)

        print("\n ALL CONSTRAINED MODELS BUILT SUCCESSFULLY")
        print("=" * 60)
        for name, model in test_models.items():
            print(f"{name:10}: {int(model.count_params()):,} parameters")

    except Exception as e:
        print(f" Error building constrained models: {e}")
        test_models = {}

    # Run the enhanced multi-model pipeline
    results, comparison, best_model = multi_model_can_pipeline()

Trial 25 Complete [00h 00m 43s]
val_loss: 0.45371848344802856

Best val_loss So Far: 0.4520763158798218
Total elapsed time: 00h 16m 37s

 BUILDING CONSTRAINED CONV1D AUTOENCODER
 Filters: 64, Kernel: 5, Optimizer: rmsprop
 Conv1D model: 22,253 parameters
 Best hyperparameters found for Conv1D
   Validation loss: 0.452076
 Error training Conv1D: HyperParameters.get() takes 2 positional arguments but 3 were given
   Skipping Conv1D...

 TRAINING GRU AUTOENCODER
 Starting hyperparameter search for GRU...

Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
8                 |8                 |units
0.3               |0.3               |dropout_rate
0.0014431         |0.0014431         |learning_rate
adam              |adam              |optimizer


 BUILDING CONSTRAINED GRU AUTOENCODER
 Units: 8, Optimizer: adam
 GRU model: 2,541 parameters
Epoch 1/30
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 29ms/step - loss: 1.1458 - mae: 0.9370 - val_

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipython-input-3057412656.py", line 1237, in <cell line: 0>
    results, comparison, best_model = multi_model_can_pipeline()
                                      ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-3057412656.py", line 1174, in multi_model_can_pipeline
    results = train_and_evaluate_multi_models(X_train, X_test, timesteps, n_features)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-3057412656.py", line 794, in train_and_evaluate_multi_models
    tuner.search(
  File "/usr/local/lib/python3.12/dist-packages/keras_tuner/src/engine/base_tuner.py", line 234, in search
    self._try_run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/usr/local/lib/python3.12/dist-packages/keras_tuner/src/engin

TypeError: object of type 'NoneType' has no len()

In [None]:
from google.colab import drive
import os
import shutil

drive.mount('/content/drive')

drive_output_dir = '/content/drive/MyDrive/CAN_Autoencoder_Results'

# Create the destination folder if it doesn't exist
os.makedirs(drive_output_dir, exist_ok=True)

print(f"Google Drive mounted and destination folder created at: {drive_output_dir}")

files_to_copy = [
    'multi_model_performance_comparison.csv',
    'can_multi_model_comparison_report.txt'
]

# Add model-specific files based on the results dictionary
if 'results' in locals() and results is not None:
    for model_name in results.keys():
        files_to_copy.append(f'can_{model_name.lower()}_autoencoder.h5')
        files_to_copy.append(f'can_{model_name.lower()}_hyperparameters.csv')
        files_to_copy.append(f'can_{model_name.lower()}_training_history.csv')

    # Add the best model file if it exists
    if 'best_model' in locals() and best_model is not None and 'best_model_name' in locals() and best_model_name is not None:
         best_model_filename = f'best_model_{best_model_name.lower()}_autoencoder.h5'
         if os.path.exists(best_model_filename):
             files_to_copy.append(best_model_filename)


# Copy each file to Google Drive
print("\nCopying files to Google Drive...")
copied_files = []
for file_name in files_to_copy:
    source_path = os.path.join('.', file_name) # Look for files in the current directory
    destination_path = os.path.join(drive_output_dir, file_name)
    if os.path.exists(source_path):
        try:
            shutil.copy(source_path, destination_path)
            copied_files.append(file_name)
            print(f" Copied: {file_name}")
        except Exception as e:
            print(f" Error copying {file_name}: {e}")
    else:
        print(f" Skipped: {file_name} (not found)")

print(f"\nFinished copying {len(copied_files)} files to Google Drive.")
if len(copied_files) < len(files_to_copy):
    print("Note: Some expected files were not found or could not be copied.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted and destination folder created at: /content/drive/MyDrive/CAN_Autoencoder_Results

Copying files to Google Drive...
 Copied: multi_model_performance_comparison.csv
 Copied: can_multi_model_comparison_report.txt
 Copied: can_lstm_autoencoder.h5
 Skipped: can_lstm_hyperparameters.csv (not found)
 Copied: can_lstm_training_history.csv
 Copied: can_conv1d_autoencoder.h5
 Skipped: can_conv1d_hyperparameters.csv (not found)
 Copied: can_conv1d_training_history.csv
 Copied: can_gru_autoencoder.h5
 Skipped: can_gru_hyperparameters.csv (not found)
 Copied: can_gru_training_history.csv
 Copied: can_hybrid_autoencoder.h5
 Skipped: can_hybrid_hyperparameters.csv (not found)
 Copied: can_hybrid_training_history.csv
 Copied: can_attention_autoencoder.h5
 Skipped: can_attention_hyperparameters.csv (not found)
 Copied: can_attention_training_history.csv
