In [None]:
import os
import numpy as np
import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras
import matplotlib.pyplot as plt
import json
from utils.model_utils import create_cnn_model, create_lstm_model, create_transformer_model
from utils.data_loader import LibriSpeechDataLoader
from utils.evaluation import ModelEvaluator
import config

# Set random seeds
tf.random.set_seed(42)
np.random.seed(42)

# Load mappings and feature info
char_to_num = np.load('char_to_num.npy', allow_pickle=True).item()
num_to_char = np.load('num_to_char.npy', allow_pickle=True).item()
feature_info = np.load('feature_info.npy', allow_pickle=True).item()

print("Hyperparameter Tuning")
print("=" * 50)

# Choose the best feature type based on previous results
best_feature_type = 'mfcc'  # You can change this based on your results
print(f"Using {best_feature_type} features for hyperparameter tuning")

# Load datasets
feature_datasets = np.load(f'feature_datasets_{best_feature_type}.npy', allow_pickle=True).item()
train_ds = feature_datasets['train']
val_ds = feature_datasets['val']

# Get model dimensions
input_dim = feature_info[best_feature_type]['input_dim']
output_dim = len(char_to_char)

print(f"Input dimension: {input_dim}")
print(f"Output dimension: {output_dim}")

# Prepare datasets for CTC training
def prepare_ctc_data(dataset):
    def add_ctc_inputs(features, labels):
        batch_size = tf.shape(features)[0]
        input_length = tf.ones((batch_size, 1)) * tf.shape(features)[1]
        label_length = tf.ones((batch_size, 1)) * tf.shape(labels)[1]
        dummy_output = tf.zeros(batch_size)
        
        return {
            'input': features,
            'y_true': labels,
            'input_length': input_length,
            'label_length': label_length
        }, dummy_output
    
    return dataset.map(add_ctc_inputs)

train_ctc_ds = prepare_ctc_data(train_ds)
val_ctc_ds = prepare_ctc_data(val_ds)

# CNN Hyperparameter Tuning
def build_cnn_model(hp):
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    filters = hp.Int('filters', min_value=32, max_value=128, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)
    lstm_units = hp.Int('lstm_units', min_value=64, max_value=256, step=64)
    
    model = create_cnn_model(
        input_dim=input_dim,
        output_dim=output_dim,
        filters=filters,
        dropout_rate=dropout_rate,
        learning_rate=learning_rate
    )
    return model

# LSTM Hyperparameter Tuning
def build_lstm_model(hp):
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    lstm_units = hp.Int('lstm_units', min_value=64, max_value=256, step=64)
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)
    num_layers = hp.Int('num_layers', min_value=2, max_value=4)
    
    model = create_lstm_model(
        input_dim=input_dim,
        output_dim=output_dim,
        lstm_units=lstm_units,
        dropout_rate=dropout_rate,
        learning_rate=learning_rate
    )
    return model

# Transformer Hyperparameter Tuning
def build_transformer_model(hp):
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    d_model = hp.Int('d_model', min_value=64, max_value=256, step=64)
    num_heads = hp.Choice('num_heads', values=[4, 8, 16])
    ff_dim = hp.Int('ff_dim', min_value=256, max_value=1024, step=256)
    num_layers = hp.Int('num_layers', min_value=2, max_value=6)
    
    model = create_transformer_model(
        input_dim=input_dim,
        output_dim=output_dim,
        d_model=d_model,
        num_heads=num_heads,
        ff_dim=ff_dim,
        learning_rate=learning_rate
    )
    return model

# Initialize tuners
print("\nInitializing Hyperparameter Tuners...")

cnn_tuner = kt.Hyperband(
    build_cnn_model,
    objective='val_loss',
    max_epochs=30,
    factor=3,
    directory='tuning',
    project_name=f'cnn_{best_feature_type}'
)

lstm_tuner = kt.Hyperband(
    build_lstm_model,
    objective='val_loss',
    max_epochs=30,
    factor=3,
    directory='tuning',
    project_name=f'lstm_{best_feature_type}'
)

transformer_tuner = kt.Hyperband(
    build_transformer_model,
    objective='val_loss',
    max_epochs=30,
    factor=3,
    directory='tuning',
    project_name=f'transformer_{best_feature_type}'
)

# Search for best hyperparameters
print("\nStarting CNN hyperparameter tuning...")
cnn_tuner.search(
    train_ctc_ds,
    validation_data=val_ctc_ds,
    epochs=30,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

print("\nStarting LSTM hyperparameter tuning...")
lstm_tuner.search(
    train_ctc_ds,
    validation_data=val_ctc_ds,
    epochs=30,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

print("\nStarting Transformer hyperparameter tuning...")
transformer_tuner.search(
    train_ctc_ds,
    validation_data=val_ctc_ds,
    epochs=30,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

# Get best hyperparameters
best_cnn_hps = cnn_tuner.get_best_hyperparameters(num_trials=1)[0]
best_lstm_hps = lstm_tuner.get_best_hyperparameters(num_trials=1)[0]
best_transformer_hps = transformer_tuner.get_best_hyperparameters(num_trials=1)[0]

print("\n" + "="*50)
print("Best Hyperparameters Found")
print("="*50)

print("\nBest CNN Hyperparameters:")
print(f"Learning Rate: {best_cnn_hps.get('learning_rate')}")
print(f"Filters: {best_cnn_hps.get('filters')}")
print(f"Dropout Rate: {best_cnn_hps.get('dropout_rate')}")
print(f"LSTM Units: {best_cnn_hps.get('lstm_units')}")

print("\nBest LSTM Hyperparameters:")
print(f"Learning Rate: {best_lstm_hps.get('learning_rate')}")
print(f"LSTM Units: {best_lstm_hps.get('lstm_units')}")
print(f"Dropout Rate: {best_lstm_hps.get('dropout_rate')}")
print(f"Number of Layers: {best_lstm_hps.get('num_layers')}")

print("\nBest Transformer Hyperparameters:")
print(f"Learning Rate: {best_transformer_hps.get('learning_rate')}")
print(f"d_model: {best_transformer_hps.get('d_model')}")
print(f"Number of Heads: {best_transformer_hps.get('num_heads')}")
print(f"FF Dimension: {best_transformer_hps.get('ff_dim')}")
print(f"Number of Layers: {best_transformer_hps.get('num_layers')}")

# Build and train best models
print("\nTraining best models with optimized hyperparameters...")

# CNN
best_cnn_model = cnn_tuner.hypermodel.build(best_cnn_hps)
cnn_history = best_cnn_model.fit(
    train_ctc_ds,
    validation_data=val_ctc_ds,
    epochs=50,
    callbacks=[
        keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(patience=5)
    ],
    verbose=1
)

# LSTM
best_lstm_model = lstm_tuner.hypermodel.build(best_lstm_hps)
lstm_history = best_lstm_model.fit(
    train_ctc_ds,
    validation_data=val_ctc_ds,
    epochs=50,
    callbacks=[
        keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(patience=5)
    ],
    verbose=1
)

# Transformer
best_transformer_model = transformer_tuner.hypermodel.build(best_transformer_hps)
transformer_history = best_transformer_model.fit(
    train_ctc_ds,
    validation_data=val_ctc_ds,
    epochs=50,
    callbacks=[
        keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(patience=5)
    ],
    verbose=1
)

# Save best models
best_cnn_model.save('models/cnn_tuned_best.h5')
best_lstm_model.save('models/lstm_tuned_best.h5')
best_transformer_model.save('models/transformer_tuned_best.h5')

# Save training histories
with open('cnn_tuned_history.json', 'w') as f:
    json.dump(cnn_history.history, f, indent=2)

with open('lstm_tuned_history.json', 'w') as f:
    json.dump(lstm_history.history, f, indent=2)

with open('transformer_tuned_history.json', 'w') as f:
    json.dump(transformer_history.history, f, indent=2)

# Save hyperparameters
best_hps = {
    'cnn': {
        'learning_rate': best_cnn_hps.get('learning_rate'),
        'filters': best_cnn_hps.get('filters'),
        'dropout_rate': best_cnn_hps.get('dropout_rate'),
        'lstm_units': best_cnn_hps.get('lstm_units')
    },
    'lstm': {
        'learning_rate': best_lstm_hps.get('learning_rate'),
        'lstm_units': best_lstm_hps.get('lstm_units'),
        'dropout_rate': best_lstm_hps.get('dropout_rate'),
        'num_layers': best_lstm_hps.get('num_layers')
    },
    'transformer': {
        'learning_rate': best_transformer_hps.get('learning_rate'),
        'd_model': best_transformer_hps.get('d_model'),
        'num_heads': best_transformer_hps.get('num_heads'),
        'ff_dim': best_transformer_hps.get('ff_dim'),
        'num_layers': best_transformer_hps.get('num_layers')
    }
}

with open('best_hyperparameters.json', 'w') as f:
    json.dump(best_hps, f, indent=2)

# Plot comparison
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(cnn_history.history['val_loss'], label='CNN Tuned')
plt.title('CNN Tuned - Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(lstm_history.history['val_loss'], label='LSTM Tuned')
plt.title('LSTM Tuned - Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 3, 3)
plt.plot(transformer_history.history['val_loss'], label='Transformer Tuned')
plt.title('Transformer Tuned - Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.savefig('tuned_models_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nHyperparameter tuning completed!")
print("Best models saved in 'models/' directory")
print("Hyperparameters saved in 'best_hyperparameters.json'")