In [1]:
from tensorflow.keras import backend as K

def clear_memory():
    K.clear_session()
    tf.compat.v1.reset_default_graph()

2025-01-16 06:34:33.374437: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-16 06:34:33.423177: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import gc
gc.collect()

23

In [3]:
import numpy as np
import glob
import os
import cv2
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Flatten, Reshape, Conv2D, Conv2DTranspose, Lambda, BatchNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score
import re

# Keeping the essential data loading function
def load_catalogs(folder: str):
    _img_name, _angle, _throttle, _image = [], [], [], []

    for _file in sorted(glob.glob(f"{folder}/*.catalog"), key=lambda x: [int(c) if c.isdigit() else c for c in re.split(r'(\d+)', x)]):
        with open(_file) as f:
            for _line in f:
                _name = _line.split()[7][1:-2]
                _img_path = os.path.join(f"{folder}/images", _name)
                
                if not os.path.isfile(_img_path):
                    continue
                
                _img = cv2.imread(_img_path)
                if _img is None:
                    continue
                
                assert _img.shape == (224, 224, 3), f"img {_name} has shape {_img.shape}"
                
                _image.append(_img)
                _angle.append(float(_line.split()[9][0:-1]))
                _throttle.append(float(_line.split()[13][0:-1]))

    print(f'Image count: {len(_image)}')
    return np.array(_image), np.array(_angle), np.array(_throttle)

# Improved data preprocessing
def data_preprocessing(images):
    return images.astype("float32") / 255.0

# Enhanced encoder with BatchNormalization
def build_encoder(input_shape=(224, 224, 3), latent_dim=64):
    inputs = Input(shape=input_shape, name="encoder_input")
    
    x = Conv2D(32, (3, 3), activation="relu", strides=2, padding="same")(inputs)
    x = BatchNormalization()(x)
    
    x = Conv2D(64, (3, 3), activation="relu", strides=2, padding="same")(x)
    x = BatchNormalization()(x)
    
    x = Conv2D(128, (3, 3), activation="relu", strides=2, padding="same")(x)
    x = BatchNormalization()(x)
    
    x = Flatten()(x)
    x = Dense(256, activation="relu")(x)
    x = BatchNormalization()(x)
    
    z_mean = Dense(latent_dim, name="z_mean")(x)
    z_log_var = Dense(latent_dim, name="z_log_var")(x)
    z = Lambda(lambda args: args[0] + tf.exp(0.5 * args[1]) * tf.random.normal(tf.shape(args[0])))([z_mean, z_log_var])
    
    return Model(inputs, [z_mean, z_log_var, z], name="encoder")

# Enhanced decoder with BatchNormalization
def build_decoder(latent_dim=64):
    latent_inputs = Input(shape=(latent_dim,), name="decoder_input")
    
    x = Dense(28 * 28 * 128, activation="relu")(latent_inputs)
    x = BatchNormalization()(x)
    x = Reshape((28, 28, 128))(x)
    
    x = Conv2DTranspose(64, (3, 3), activation="relu", strides=2, padding="same")(x)
    x = BatchNormalization()(x)
    
    x = Conv2DTranspose(32, (3, 3), activation="relu", strides=2, padding="same")(x)
    x = BatchNormalization()(x)
    
    x = Conv2DTranspose(32, (3, 3), activation="relu", strides=2, padding="same")(x)
    x = BatchNormalization()(x)
    
    outputs = Conv2DTranspose(3, (3, 3), activation="sigmoid", padding="same")(x)
    
    return Model(latent_inputs, outputs, name="decoder")

# Improved VAE with better loss balancing
def build_vae(input_shape=(224, 224, 3), latent_dim=64, beta=0.01):
    encoder = build_encoder(input_shape, latent_dim)
    decoder = build_decoder(latent_dim)
    inputs = Input(shape=input_shape, name="vae_input")
    z_mean, z_log_var, z = encoder(inputs)
    outputs = decoder(z)
    vae = Model(inputs, outputs, name="vae")
    # Fixed reconstruction loss calculation
    reconstruction_loss = tf.reduce_mean(
        tf.keras.losses.binary_crossentropy(
            tf.keras.backend.flatten(inputs),
            tf.keras.backend.flatten(outputs)
        )
    )
    kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
    vae_loss = reconstruction_loss + beta * kl_loss
    vae.add_loss(vae_loss)
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
    return vae, encoder

# Improved control model for both angle and throttle
def build_control_model(input_dim):
    inputs = Input(shape=(input_dim,))
    x = Dense(128, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    
    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    
    angle_output = Dense(1, activation='tanh', name='angle')(x)  # tanh for angle (-1 to 1)
    throttle_output = Dense(1, activation='sigmoid', name='throttle')(x)  # sigmoid for throttle (0 to 1)
    
    model = Model(inputs=inputs, outputs=[angle_output, throttle_output])
    model.compile(optimizer='adam',
                 loss={'angle': 'mse', 'throttle': 'mse'},
                 metrics={'angle': ['mae', 'mse'], 'throttle': ['mae', 'mse']})
    
    return model

# Main training and evaluation pipeline
def train_and_evaluate(data_folder):
    # Load and preprocess data
    images, angles, throttles = load_catalogs(data_folder)
    images = data_preprocessing(images)
    
    # Split data
    X_train, X_test, y_train_angle, y_test_angle, y_train_throttle, y_test_throttle = train_test_split(
        images, angles, throttles, test_size=0.2, random_state=42)
    
    # Build and train VAE
    vae, encoder = build_vae()
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)
    
    vae.fit(X_train, X_train,
            epochs=10,
            batch_size=16,
            validation_data=(X_test, X_test),
            callbacks=[early_stopping, reduce_lr])
    
    # Extract latent representations
    latent_train = encoder.predict(X_train)[2]
    latent_test = encoder.predict(X_test)[2]
    
    # Train control model
    control_model = build_control_model(latent_train.shape[1])
    history = control_model.fit(latent_train,
                              {'angle': y_train_angle, 'throttle': y_train_throttle},
                              validation_data=(latent_test, {'angle': y_test_angle, 'throttle': y_test_throttle}),
                              epochs=10,
                              batch_size=16,
                              callbacks=[early_stopping, reduce_lr])
    
    # Evaluate and visualize results
    angle_pred, throttle_pred = control_model.predict(latent_test)
    
    # Calculate metrics for both controls
    metrics = {
        'angle': {
            'mse': mean_squared_error(y_test_angle, angle_pred),
            'r2': r2_score(y_test_angle, angle_pred)
        },
        'throttle': {
            'mse': mean_squared_error(y_test_throttle, throttle_pred),
            'r2': r2_score(y_test_throttle, throttle_pred)
        }
    }
    
    print("\nPerformance Metrics:")
    print(f"Angle - MSE: {metrics['angle']['mse']:.4f}, R2: {metrics['angle']['r2']:.4f}")
    print(f"Throttle - MSE: {metrics['throttle']['mse']:.4f}, R2: {metrics['throttle']['r2']:.4f}")
    
    return vae, encoder, control_model, metrics, history


In [None]:
# Usage
if __name__ == "__main__":
    data_folder = "/home/UFAD/mohitkukreja/Documents/data_perfect_driving"
    vae, encoder, control_model, metrics, history = train_and_evaluate(data_folder)