In [8]:
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Input, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.regularizers import l2

In [30]:
print(len(os.listdir('cats-v-non-cats/training/cats')))
print(len(os.listdir('cats-v-non-cats/training/non-cats')))

print(len(os.listdir('cats-v-non-cats/validation/cats')))
print(len(os.listdir('cats-v-non-cats/validation/non-cats')))

print(len(os.listdir('cats-v-non-cats/test/cats')))
print(len(os.listdir('cats-v-non-cats/test/non-cats')))

4272
4272
534
534
535
534


In [1]:
#Define data path
TRAINING_DIR = "cats-v-non-cats/training/"
VALIDATION_DIR = "cats-v-non-cats/validation/"
TESTING_DIR = "cats-v-non-cats/test/"

# Define whether to include test split or not
INCLUDE_TEST = True

# Model

In [33]:
# Hyperparameter Tuning Configuration
import itertools
import pandas as pd
from datetime import datetime
import json

# Define hyperparameter search space
HYPERPARAMETER_SPACE = {
    'learning_rate': [0.001, 0.0005, 0.0001],
    'batch_size': [16, 32, 64],
    'reg_strength': [0.0001, 0.0005, 0.001],
    'dropout_conv': [0.2, 0.25, 0.3],
    'dropout_dense': [0.4, 0.5, 0.6],
    'dense_units': [256, 512, 1024],
    'filters_multiplier': [1.25, 1.5, 2],
    'beta_1': [0.8, 0.9, 0.95, 0.99],  # Adam momentum parameter
    'beta_2': [0.99, 0.999, 0.9999]    # Adam variance parameter
}

# Track results
tuning_results = []

def create_tuned_model(reg_strength=0.0005, dropout_conv=0.25, dropout_dense=0.5, 
                       dense_units=512, filters_multiplier=1):

    inputs = Input(shape=(128, 128, 3))
    
    # Calculate filter sizes
    filters1 = int(32 * filters_multiplier)
    filters2 = int(64 * filters_multiplier)
    filters3 = int(128 * filters_multiplier)

    # First block
    x = Conv2D(filters1, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(reg_strength))(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(filters1, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(reg_strength))(x)
    x = MaxPooling2D(2, 2)(x)
    x = Dropout(dropout_conv)(x)

    # Second block
    x = Conv2D(filters2, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(reg_strength))(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters2, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(reg_strength))(x)
    x = MaxPooling2D(2, 2)(x)
    x = Dropout(dropout_conv)(x)

    # Third block
    x = Conv2D(filters3, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(reg_strength))(x)
    x = BatchNormalization()(x)
    x = Conv2D(filters3, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(reg_strength))(x)
    x = MaxPooling2D(2, 2)(x)
    x = Dropout(dropout_conv)(x)

    # Global pooling and dense layers
    x = GlobalAveragePooling2D()(x)
    x = Dense(dense_units, activation='relu', kernel_regularizer=l2(reg_strength))(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout_dense)(x)
    x = Dense(1, activation='sigmoid', kernel_regularizer=l2(reg_strength))(x)

    return Model(inputs=inputs, outputs=x)

print(f"Total possible combinations: {len(list(itertools.product(*HYPERPARAMETER_SPACE.values())))}")

Total possible combinations: 26244


In [34]:
# Smart Hyperparameter Search Function
def run_hyperparameter_search(max_trials=12, epochs_per_trial=50):

    global tuning_results
    
    # Create directory for saved models
    models_dir = "hyperparameter_models"
    os.makedirs(models_dir, exist_ok=True)
    print(f"Models will be saved in: {models_dir}/")
    
    # Define strategic combinations to test (now including beta parameter variations)
    strategic_combinations = [
        # Baseline (current setup with default betas)
        {'learning_rate': 0.0005, 'batch_size': 32, 'reg_strength': 0.0005, 
         'dropout_conv': 0.25, 'dropout_dense': 0.5, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.9, 'beta_2': 0.999},
        
        # High momentum (higher beta_1) - good for noisy gradients
        {'learning_rate': 0.001, 'batch_size': 32, 'reg_strength': 0.0001, 
         'dropout_conv': 0.2, 'dropout_dense': 0.4, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.95, 'beta_2': 0.999},
        
        # Very high momentum - even smoother convergence
        {'learning_rate': 0.0005, 'batch_size': 32, 'reg_strength': 0.0005, 
         'dropout_conv': 0.25, 'dropout_dense': 0.5, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.99, 'beta_2': 0.999},
        
        # Low momentum (lower beta_1) - faster adaptation
        {'learning_rate': 0.001, 'batch_size': 32, 'reg_strength': 0.0005, 
         'dropout_conv': 0.25, 'dropout_dense': 0.5, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.8, 'beta_2': 0.999},
        
        # High variance adaptation (lower beta_2) - for dense gradients
        {'learning_rate': 0.0005, 'batch_size': 32, 'reg_strength': 0.0005, 
         'dropout_conv': 0.25, 'dropout_dense': 0.5, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.9, 'beta_2': 0.99},
        
        # Very high variance stability (higher beta_2) - for sparse gradients
        {'learning_rate': 0.0001, 'batch_size': 32, 'reg_strength': 0.001, 
         'dropout_conv': 0.3, 'dropout_dense': 0.6, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.9, 'beta_2': 0.9999},
        
        # Balanced high momentum + high variance stability
        {'learning_rate': 0.0005, 'batch_size': 64, 'reg_strength': 0.0005, 
         'dropout_conv': 0.25, 'dropout_dense': 0.5, 'dense_units': 1024, 'filters_multiplier': 1.5,
         'beta_1': 0.95, 'beta_2': 0.9999},
        
        # Fast adaptation setup (low momentum + low variance)
        {'learning_rate': 0.001, 'batch_size': 16, 'reg_strength': 0.0001, 
         'dropout_conv': 0.2, 'dropout_dense': 0.4, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.8, 'beta_2': 0.99},
        
        # Conservative setup with stable betas
        {'learning_rate': 0.0001, 'batch_size': 64, 'reg_strength': 0.001, 
         'dropout_conv': 0.3, 'dropout_dense': 0.6, 'dense_units': 256, 'filters_multiplier': 1,
         'beta_1': 0.95, 'beta_2': 0.9999},
        
        # High capacity with optimized betas
        {'learning_rate': 0.0005, 'batch_size': 32, 'reg_strength': 0.0005, 
         'dropout_conv': 0.2, 'dropout_dense': 0.4, 'dense_units': 1024, 'filters_multiplier': 2,
         'beta_1': 0.99, 'beta_2': 0.999},
        
        # Aggressive training with low momentum
        {'learning_rate': 0.001, 'batch_size': 64, 'reg_strength': 0.0001, 
         'dropout_conv': 0.2, 'dropout_dense': 0.4, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.8, 'beta_2': 0.99},
        
        # Ultra-stable training (highest betas)
        {'learning_rate': 0.0001, 'batch_size': 32, 'reg_strength': 0.0005, 
         'dropout_conv': 0.25, 'dropout_dense': 0.5, 'dense_units': 512, 'filters_multiplier': 1,
         'beta_1': 0.99, 'beta_2': 0.9999}
    ]
    
    # Limit to max_trials
    combinations_to_test = strategic_combinations[:max_trials]
    
    print(f"Starting enhanced hyperparameter search with {len(combinations_to_test)} combinations...")
    print(f"Each trial will run for {epochs_per_trial} epochs with early stopping.")
    print("Each best model will be saved with descriptive filename for later analysis.")
    
    for trial_idx, params in enumerate(combinations_to_test):
        print(f"\n{'='*70}")
        print(f"TRIAL {trial_idx + 1}/{len(combinations_to_test)}")
        print(f"Parameters: {params}")
        print(f"Adam Optimizer: lr={params['learning_rate']}, Œ≤‚ÇÅ={params['beta_1']}, Œ≤‚ÇÇ={params['beta_2']}")
        print(f"{'='*70}")
        
        start_time = datetime.now()
        
        # Generate descriptive model filename
        model_filename = f"{models_dir}/trial_{trial_idx+1:02d}_lr{params['learning_rate']}_b1-{params['beta_1']}_b2-{params['beta_2']}_bs{params['batch_size']}_reg{params['reg_strength']}_du{params['dense_units']}.h5"
        
        try:
            # Create model with current parameters
            model = create_tuned_model(
                reg_strength=params['reg_strength'],
                dropout_conv=params['dropout_conv'],
                dropout_dense=params['dropout_dense'],
                dense_units=params['dense_units'],
                filters_multiplier=params['filters_multiplier']
            )
            
            # Setup callbacks for this trial (with individual model saving)
            reduce_lr = ReduceLROnPlateau(
                monitor='val_accuracy',  # Focus on accuracy for tuning
                factor=0.2,
                patience=3,  # Shorter patience for tuning
                min_lr=1e-8,
                verbose=1
            )
            
            early_stop = EarlyStopping(
                monitor='val_accuracy',
                patience=7,  # Shorter patience for tuning
                restore_best_weights=True,
                verbose=1,
                mode='max'  # Maximize accuracy
            )
            
            # Save best model for this trial
            trial_checkpoint = ModelCheckpoint(
                model_filename,
                monitor='val_accuracy',
                save_best_only=True,
                verbose=1,
                mode='max'
            )
            
            callbacks = [reduce_lr, early_stop, trial_checkpoint]
            
            # Compile model with beta parameters
            model.compile(
                optimizer=Adam(
                    learning_rate=params['learning_rate'], 
                    beta_1=params['beta_1'], 
                    beta_2=params['beta_2']
                ),
                loss='binary_crossentropy',
                metrics=['accuracy', 'precision', 'recall', 'auc']
            )
            
            # Create data generators with current batch size
            train_generator = train_gen.flow_from_directory(
                TRAINING_DIR,
                target_size=(128, 128),
                batch_size=params['batch_size'],
                class_mode='binary',
                shuffle=True
            )
            
            validation_generator = validation_gen.flow_from_directory(
                VALIDATION_DIR,
                target_size=(128, 128),
                batch_size=params['batch_size'],
                class_mode='binary',
                shuffle=False
            )
            
            # Train model
            history = model.fit(
                train_generator,
                epochs=epochs_per_trial,
                validation_data=validation_generator,
                callbacks=callbacks,
                verbose=1
            )
            
            # Get best results
            best_val_accuracy = max(history.history['val_accuracy'])
            best_val_auc = max(history.history['val_auc'])
            final_train_accuracy = history.history['accuracy'][-1]
            final_val_accuracy = history.history['val_accuracy'][-1]
            
            # Calculate training time
            end_time = datetime.now()
            training_time = (end_time - start_time).total_seconds()
            
            # Store results with model filename
            result = {
                'trial': trial_idx + 1,
                'params': params.copy(),
                'best_val_accuracy': best_val_accuracy,
                'best_val_auc': best_val_auc,
                'final_train_accuracy': final_train_accuracy,
                'final_val_accuracy': final_val_accuracy,
                'epochs_run': len(history.history['accuracy']),
                'training_time_seconds': training_time,
                'model_filename': model_filename,
                'timestamp': start_time.isoformat()
            }
            
            tuning_results.append(result)
            
            print(f"\nTrial {trial_idx + 1} Results:")
            print(f"Best Validation Accuracy: {best_val_accuracy:.4f}")
            print(f"Best Validation AUC: {best_val_auc:.4f}")
            print(f"Final Train Accuracy: {final_train_accuracy:.4f}")
            print(f"Final Val Accuracy: {final_val_accuracy:.4f}")
            print(f"Epochs Run: {len(history.history['accuracy'])}")
            print(f"Training Time: {training_time:.1f} seconds")
            print(f"Beta Parameters: Œ≤‚ÇÅ={params['beta_1']}, Œ≤‚ÇÇ={params['beta_2']}")
            print(f"Model saved as: {model_filename}")
            
        except Exception as e:
            print(f"Trial {trial_idx + 1} failed with error: {str(e)}")
            result = {
                'trial': trial_idx + 1,
                'params': params.copy(),
                'error': str(e),
                'model_filename': None,
                'timestamp': start_time.isoformat()
            }
            tuning_results.append(result)
            continue
    
    # Print summary of all saved models
    print(f"\n{'='*80}")
    print("SAVED MODELS SUMMARY:")
    print("="*80)
    successful_trials = [r for r in tuning_results if 'error' not in r and 'model_filename' in r]
    for result in successful_trials:
        print(f"Trial {result['trial']}: {result['model_filename']} (Acc: {result['best_val_accuracy']:.4f})")
    
    return tuning_results

In [35]:
# Enhanced Results Analysis and Visualization (with Beta Parameters and Model Info)
def analyze_tuning_results():

    if not tuning_results:
        print("No tuning results available. Run the search first!")
        return None
    
    # Convert to DataFrame for easier analysis
    results_df = pd.DataFrame([r for r in tuning_results if 'error' not in r])
    
    if results_df.empty:
        print("All trials failed. Check your setup and try again.")
        return None
    
    # Extract parameter columns
    param_columns = list(HYPERPARAMETER_SPACE.keys())
    for param in param_columns:
        results_df[param] = results_df['params'].apply(lambda x: x[param])
    
    # Sort by best validation accuracy
    results_df_sorted = results_df.sort_values('best_val_accuracy', ascending=False)
    
    print("="*80)
    print("ENHANCED HYPERPARAMETER TUNING RESULTS (with Adam Beta Analysis)")
    print("="*80)
    
    # Display top 5 results with beta parameter info and model filenames
    print("\nTOP 5 PERFORMING CONFIGURATIONS:")
    print("-" * 80)
    for idx, (_, row) in enumerate(results_df_sorted.head().iterrows()):
        print(f"\nRank {idx + 1}: Trial {int(row['trial'])}")
        print(f"Validation Accuracy: {row['best_val_accuracy']:.4f}")
        print(f"Validation AUC: {row['best_val_auc']:.4f}")
        print(f"Adam Betas: Œ≤‚ÇÅ={row['beta_1']}, Œ≤‚ÇÇ={row['beta_2']}")
        print(f"Learning Rate: {row['learning_rate']}")
        if 'model_filename' in row and row['model_filename']:
            print(f"Saved Model: {row['model_filename']}")
        print(f"Full Parameters: {row['params']}")
    
    # Find best configuration
    best_config = results_df_sorted.iloc[0]
    print(f"\n" + "="*80)
    print("BEST CONFIGURATION:")
    print("="*80)
    print(f"Trial: {int(best_config['trial'])}")
    print(f"Best Validation Accuracy: {best_config['best_val_accuracy']:.4f}")
    print(f"Best Validation AUC: {best_config['best_val_auc']:.4f}")
    print(f"Training Time: {best_config['training_time_seconds']:.1f} seconds")
    print(f"Optimal Adam Parameters:")
    print(f"  Learning Rate: {best_config['learning_rate']}")
    print(f"  Beta_1 (momentum): {best_config['beta_1']}")
    print(f"  Beta_2 (variance): {best_config['beta_2']}")
    if 'model_filename' in best_config and best_config['model_filename']:
        print(f"Best Model File: {best_config['model_filename']}")
    
    # Beta parameter analysis
    print(f"\n" + "="*80)
    print("BETA PARAMETER ANALYSIS:")
    print("="*80)
    
    # Group by beta values and show average performance
    beta1_performance = results_df.groupby('beta_1')['best_val_accuracy'].agg(['mean', 'std', 'count'])
    beta2_performance = results_df.groupby('beta_2')['best_val_accuracy'].agg(['mean', 'std', 'count'])
    
    print(f"\nBeta_1 (Momentum) Performance:")
    for beta1, stats in beta1_performance.iterrows():
        print(f"  Œ≤‚ÇÅ={beta1}: Avg Accuracy={stats['mean']:.4f} ¬±{stats['std']:.4f} (n={int(stats['count'])})")
    
    print(f"\nBeta_2 (Variance) Performance:")
    for beta2, stats in beta2_performance.iterrows():
        print(f"  Œ≤‚ÇÇ={beta2}: Avg Accuracy={stats['mean']:.4f} ¬±{stats['std']:.4f} (n={int(stats['count'])})")
    
    # Saved models summary
    print(f"\n" + "="*80)
    print("SAVED MODELS SUMMARY:")
    print("="*80)
    models_info = results_df_sorted[['trial', 'best_val_accuracy', 'best_val_auc', 'model_filename']].copy()
    models_info = models_info[models_info['model_filename'].notna()]
    
    print(f"Total models saved: {len(models_info)}")
    print("\nAll saved models (sorted by performance):")
    for _, row in models_info.iterrows():
        filename = os.path.basename(row['model_filename']) if row['model_filename'] else 'N/A'
        print(f"  Trial {int(row['trial'])}: {filename}")
        print(f"    Accuracy: {row['best_val_accuracy']:.4f}, AUC: {row['best_val_auc']:.4f}")
    
    # Create enhanced visualizations
    fig, axes = plt.subplots(3, 3, figsize=(20, 15))
    fig.suptitle('Enhanced Hyperparameter Tuning Analysis (with Adam Beta Parameters)', fontsize=16)
    
    # 1. Accuracy vs Trial
    axes[0,0].scatter(results_df['trial'], results_df['best_val_accuracy'], alpha=0.7)
    axes[0,0].set_xlabel('Trial Number')
    axes[0,0].set_ylabel('Best Validation Accuracy')
    axes[0,0].set_title('Accuracy by Trial')
    axes[0,0].grid(True)
    
    # 2. Learning Rate Impact
    lr_groups = results_df.groupby('learning_rate')['best_val_accuracy'].mean()
    axes[0,1].bar(range(len(lr_groups)), lr_groups.values)
    axes[0,1].set_xticks(range(len(lr_groups)))
    axes[0,1].set_xticklabels([f'{lr:.4f}' for lr in lr_groups.index])
    axes[0,1].set_xlabel('Learning Rate')
    axes[0,1].set_ylabel('Mean Validation Accuracy')
    axes[0,1].set_title('Learning Rate Impact')
    axes[0,1].grid(True)
    
    # 3. Beta_1 Impact
    beta1_groups = results_df.groupby('beta_1')['best_val_accuracy'].mean()
    axes[0,2].bar(range(len(beta1_groups)), beta1_groups.values)
    axes[0,2].set_xticks(range(len(beta1_groups)))
    axes[0,2].set_xticklabels([f'{b1:.2f}' for b1 in beta1_groups.index])
    axes[0,2].set_xlabel('Beta_1 (Momentum)')
    axes[0,2].set_ylabel('Mean Validation Accuracy')
    axes[0,2].set_title('Beta_1 Impact')
    axes[0,2].grid(True)
    
    # 4. Beta_2 Impact
    beta2_groups = results_df.groupby('beta_2')['best_val_accuracy'].mean()
    axes[1,0].bar(range(len(beta2_groups)), beta2_groups.values)
    axes[1,0].set_xticks(range(len(beta2_groups)))
    axes[1,0].set_xticklabels([f'{b2:.3f}' for b2 in beta2_groups.index])
    axes[1,0].set_xlabel('Beta_2 (Variance)')
    axes[1,0].set_ylabel('Mean Validation Accuracy')
    axes[1,0].set_title('Beta_2 Impact')
    axes[1,0].grid(True)
    
    # 5. Batch Size Impact
    batch_groups = results_df.groupby('batch_size')['best_val_accuracy'].mean()
    axes[1,1].bar(range(len(batch_groups)), batch_groups.values)
    axes[1,1].set_xticks(range(len(batch_groups)))
    axes[1,1].set_xticklabels(batch_groups.index)
    axes[1,1].set_xlabel('Batch Size')
    axes[1,1].set_ylabel('Mean Validation Accuracy')
    axes[1,1].set_title('Batch Size Impact')
    axes[1,1].grid(True)
    
    # 6. Beta_1 vs Beta_2 Scatter
    scatter = axes[1,2].scatter(results_df['beta_1'], results_df['beta_2'], 
                               c=results_df['best_val_accuracy'], cmap='viridis', alpha=0.7)
    axes[1,2].set_xlabel('Beta_1 (Momentum)')
    axes[1,2].set_ylabel('Beta_2 (Variance)')
    axes[1,2].set_title('Beta_1 vs Beta_2 (Color = Accuracy)')
    plt.colorbar(scatter, ax=axes[1,2])
    
    # 7. Regularization Impact
    reg_groups = results_df.groupby('reg_strength')['best_val_accuracy'].mean()
    axes[2,0].bar(range(len(reg_groups)), reg_groups.values)
    axes[2,0].set_xticks(range(len(reg_groups)))
    axes[2,0].set_xticklabels([f'{reg:.4f}' for reg in reg_groups.index])
    axes[2,0].set_xlabel('L2 Regularization')
    axes[2,0].set_ylabel('Mean Validation Accuracy')
    axes[2,0].set_title('Regularization Impact')
    axes[2,0].grid(True)
    
    # 8. Dense Units Impact
    dense_groups = results_df.groupby('dense_units')['best_val_accuracy'].mean()
    axes[2,1].bar(range(len(dense_groups)), dense_groups.values)
    axes[2,1].set_xticks(range(len(dense_groups)))
    axes[2,1].set_xticklabels(dense_groups.index)
    axes[2,1].set_xlabel('Dense Layer Units')
    axes[2,1].set_ylabel('Mean Validation Accuracy')
    axes[2,1].set_title('Dense Units Impact')
    axes[2,1].grid(True)
    
    # 9. Accuracy vs AUC
    axes[2,2].scatter(results_df['best_val_accuracy'], results_df['best_val_auc'], alpha=0.7)
    axes[2,2].set_xlabel('Best Validation Accuracy')
    axes[2,2].set_ylabel('Best Validation AUC')
    axes[2,2].set_title('Accuracy vs AUC')
    axes[2,2].grid(True)
    
    plt.tight_layout()
    plt.show()
    
    return best_config, results_df_sorted

def save_results_to_file(filename='hyperparameter_results_with_betas.json'):
    """Save tuning results to file including model filenames"""
    if tuning_results:
        with open(filename, 'w') as f:
            json.dump(tuning_results, f, indent=2)
        print(f"Enhanced results (with beta parameters and model filenames) saved to {filename}")
    else:
        print("No results to save!")

def load_best_models(top_n=3):
    """Load the top N performing models for ensemble or comparison"""
    if not tuning_results:
        print("No tuning results available!")
        return []
    
    # Get successful results sorted by accuracy
    successful_results = [r for r in tuning_results if 'error' not in r and 'model_filename' in r]
    successful_results.sort(key=lambda x: x['best_val_accuracy'], reverse=True)
    
    loaded_models = []
    print(f"\nLoading top {top_n} models...")
    
    for i, result in enumerate(successful_results[:top_n]):
        try:
            model = load_model(result['model_filename'])
            loaded_models.append({
                'model': model,
                'trial': result['trial'],
                'accuracy': result['best_val_accuracy'],
                'auc': result['best_val_auc'],
                'params': result['params'],
                'filename': result['model_filename']
            })
            print(f"‚úì Loaded Trial {result['trial']}: {os.path.basename(result['model_filename'])} (Acc: {result['best_val_accuracy']:.4f})")
        except Exception as e:
            print(f"‚úó Failed to load Trial {result['trial']}: {str(e)}")
    
    return loaded_models

print("Enhanced analysis functions with model management ready!")

Enhanced analysis functions with model management ready!


In [36]:
# Execute Enhanced Hyperparameter Search (with Adam Beta Parameters)
# WARNING: This will take significant time depending on max_trials and epochs_per_trial

# Configuration for the search
MAX_TRIALS = 12  # Increased to test more beta combinations
EPOCHS_PER_TRIAL = 50  # Slightly more epochs to see beta effects

print(f"Starting ENHANCED hyperparameter search with {MAX_TRIALS} trials, {EPOCHS_PER_TRIAL} epochs each...")
print("\nBeta Parameter Testing Strategy:")
print("‚Ä¢ Œ≤‚ÇÅ (momentum): Testing 0.8, 0.9, 0.95, 0.99")
print("‚Ä¢ Œ≤‚ÇÇ (variance): Testing 0.99, 0.999, 0.9999")
print("‚Ä¢ Looking for optimal momentum vs stability balance")

# Ensure we have the data generators
if 'train_gen' not in locals():
    print("\nSetting up data generators...")
    # Data Augmentation for Better Generalization
    train_gen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        fill_mode='nearest'
    )

    validation_gen = ImageDataGenerator(rescale=1./255)

print("\n" + "="*70)
print("BETA PARAMETER IMPACT EXPLANATION:")
print("="*70)
print("Higher Œ≤‚ÇÅ (0.95-0.99): More momentum, smoother convergence")
print("Lower Œ≤‚ÇÅ (0.8-0.9): Less momentum, faster adaptation to changes")
print("Higher Œ≤‚ÇÇ (0.999-0.9999): More stable, better for sparse gradients")  
print("Lower Œ≤‚ÇÇ (0.99-0.995): Faster adaptation, better for dense gradients")
print("="*70)

# Run the enhanced search
search_results = run_hyperparameter_search(max_trials=MAX_TRIALS, epochs_per_trial=EPOCHS_PER_TRIAL)

Starting ENHANCED hyperparameter search with 12 trials, 50 epochs each...
Now testing Adam optimizer beta parameters (Œ≤‚ÇÅ and Œ≤‚ÇÇ) for optimization improvements!
This may take 45-90 minutes depending on your hardware.

Beta Parameter Testing Strategy:
‚Ä¢ Œ≤‚ÇÅ (momentum): Testing 0.8, 0.9, 0.95, 0.99
‚Ä¢ Œ≤‚ÇÇ (variance): Testing 0.99, 0.999, 0.9999
‚Ä¢ Looking for optimal momentum vs stability balance

Setting up data generators...

BETA PARAMETER IMPACT EXPLANATION:
Higher Œ≤‚ÇÅ (0.95-0.99): More momentum, smoother convergence
Lower Œ≤‚ÇÅ (0.8-0.9): Less momentum, faster adaptation to changes
Higher Œ≤‚ÇÇ (0.999-0.9999): More stable, better for sparse gradients
Lower Œ≤‚ÇÇ (0.99-0.995): Faster adaptation, better for dense gradients
Models will be saved in: hyperparameter_models/
Starting enhanced hyperparameter search with 12 combinations...
Each trial will run for 50 epochs with early stopping.
Each best model will be saved with descriptive filename for later analysis.

TRIAL 1

Starting ENHANCED hyperparameter search with 12 trials, 50 epochs each...
Now testing Adam optimizer beta parameters (Œ≤‚ÇÅ and Œ≤‚ÇÇ) for optimization improvements!
This may take 45-90 minutes depending on your hardware.

Beta Parameter Testing Strategy:
‚Ä¢ Œ≤‚ÇÅ (momentum): Testing 0.8, 0.9, 0.95, 0.99
‚Ä¢ Œ≤‚ÇÇ (variance): Testing 0.99, 0.999, 0.9999
‚Ä¢ Looking for optimal momentum vs stability balance

Setting up data generators...

BETA PARAMETER IMPACT EXPLANATION:
Higher Œ≤‚ÇÅ (0.95-0.99): More momentum, smoother convergence
Lower Œ≤‚ÇÅ (0.8-0.9): Less momentum, faster adaptation to changes
Higher Œ≤‚ÇÇ (0.999-0.9999): More stable, better for sparse gradients
Lower Œ≤‚ÇÇ (0.99-0.995): Faster adaptation, better for dense gradients
Models will be saved in: hyperparameter_models/
Starting enhanced hyperparameter search with 12 combinations...
Each trial will run for 50 epochs with early stopping.
Each best model will be saved with descriptive filename for later analysis.

TRIAL 1

  self._warn_if_super_not_called()


Starting ENHANCED hyperparameter search with 12 trials, 50 epochs each...
Now testing Adam optimizer beta parameters (Œ≤‚ÇÅ and Œ≤‚ÇÇ) for optimization improvements!
This may take 45-90 minutes depending on your hardware.

Beta Parameter Testing Strategy:
‚Ä¢ Œ≤‚ÇÅ (momentum): Testing 0.8, 0.9, 0.95, 0.99
‚Ä¢ Œ≤‚ÇÇ (variance): Testing 0.99, 0.999, 0.9999
‚Ä¢ Looking for optimal momentum vs stability balance

Setting up data generators...

BETA PARAMETER IMPACT EXPLANATION:
Higher Œ≤‚ÇÅ (0.95-0.99): More momentum, smoother convergence
Lower Œ≤‚ÇÅ (0.8-0.9): Less momentum, faster adaptation to changes
Higher Œ≤‚ÇÇ (0.999-0.9999): More stable, better for sparse gradients
Lower Œ≤‚ÇÇ (0.99-0.995): Faster adaptation, better for dense gradients
Models will be saved in: hyperparameter_models/
Starting enhanced hyperparameter search with 12 combinations...
Each trial will run for 50 epochs with early stopping.
Each best model will be saved with descriptive filename for later analysis.

TRIAL 1

  self._warn_if_super_not_called()


Epoch 1/50
[1m 44/267[0m [32m‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [1m5:46[0m 2s/step - accuracy: 0.6439 - auc: 0.7090 - loss: 1.0253 - precision: 0.6480 - recall: 0.5659

Starting ENHANCED hyperparameter search with 12 trials, 50 epochs each...
Now testing Adam optimizer beta parameters (Œ≤‚ÇÅ and Œ≤‚ÇÇ) for optimization improvements!
This may take 45-90 minutes depending on your hardware.

Beta Parameter Testing Strategy:
‚Ä¢ Œ≤‚ÇÅ (momentum): Testing 0.8, 0.9, 0.95, 0.99
‚Ä¢ Œ≤‚ÇÇ (variance): Testing 0.99, 0.999, 0.9999
‚Ä¢ Looking for optimal momentum vs stability balance

Setting up data generators...

BETA PARAMETER IMPACT EXPLANATION:
Higher Œ≤‚ÇÅ (0.95-0.99): More momentum, smoother convergence
Lower Œ≤‚ÇÅ (0.8-0.9): Less momentum, faster adaptation to changes
Higher Œ≤‚ÇÇ (0.999-0.9999): More stable, better for sparse gradients
Lower Œ≤‚ÇÇ (0.99-0.995): Faster adaptation, better for dense gradients
Models will be saved in: hyperparameter_models/
Starting enhanced hyperparameter search with 12 combinations...
Each trial will run for 50 epochs with early stopping.
Each best model will be saved with descriptive filename for later analysis.

TRIAL 1

  self._warn_if_super_not_called()


Epoch 1/50
[1m 44/267[0m [32m‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [1m5:46[0m 2s/step - accuracy: 0.6439 - auc: 0.7090 - loss: 1.0253 - precision: 0.6480 - recall: 0.5659

KeyboardInterrupt: 

In [None]:
# Analyze Results and Train Best Model (with Optimized Beta Parameters)
print("Analyzing enhanced hyperparameter search results (including Adam beta parameters)...")

# Analyze the results
best_config, all_results = analyze_tuning_results()

# Save enhanced results for future reference
save_results_to_file('hyperparameter_tuning_results_with_betas.json')

if best_config is not None:
    print("\n" + "="*80)
    print("TRAINING FINAL MODEL WITH OPTIMIZED ADAM BETA PARAMETERS")
    print("="*80)
    
    # Get best parameters including betas
    best_params = best_config['params']
    print(f"Using optimized parameters: {best_params}")
    print(f"\nOptimized Adam Configuration:")
    print(f"  Learning Rate: {best_params['learning_rate']}")
    print(f"  Beta_1 (momentum): {best_params['beta_1']}")
    print(f"  Beta_2 (variance): {best_params['beta_2']}")
    
    # Create the best model
    best_model = create_tuned_model(
        reg_strength=best_params['reg_strength'],
        dropout_conv=best_params['dropout_conv'],
        dropout_dense=best_params['dropout_dense'],
        dense_units=best_params['dense_units'],
        filters_multiplier=best_params['filters_multiplier']
    )
    
    # Setup callbacks for final training (longer patience)
    reduce_lr_final = ReduceLROnPlateau(
        monitor='val_accuracy',
        factor=0.2,
        patience=5,
        min_lr=1e-8,
        verbose=1
    )
    
    early_stop_final = EarlyStopping(
        monitor='val_accuracy',
        patience=15,
        restore_best_weights=True,
        verbose=1,
        mode='max'
    )
    
    checkpoint_final = ModelCheckpoint(
        'best_tuned_model_with_betas.h5',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
    
    callbacks_final = [reduce_lr_final, early_stop_final, checkpoint_final]
    
    # Compile with optimized Adam parameters (including betas!)
    best_model.compile(
        optimizer=Adam(
            learning_rate=best_params['learning_rate'],
            beta_1=best_params['beta_1'],  # Optimized momentum
            beta_2=best_params['beta_2']   # Optimized variance
        ),
        loss='binary_crossentropy',
        metrics=['accuracy', 'precision', 'recall', 'auc']
    )
    
    # Create data generators with best batch size
    final_train_generator = train_gen.flow_from_directory(
        TRAINING_DIR,
        target_size=(128, 128),
        batch_size=best_params['batch_size'],
        class_mode='binary',
        shuffle=True
    )
    
    final_validation_generator = validation_gen.flow_from_directory(
        VALIDATION_DIR,
        target_size=(128, 128),
        batch_size=best_params['batch_size'],
        class_mode='binary',
        shuffle=False
    )
    
    print(f"\nStarting final training with optimized Adam beta parameters...")
    print(f"Using Œ≤‚ÇÅ={best_params['beta_1']} (momentum) and Œ≤‚ÇÇ={best_params['beta_2']} (variance)")
    
    # Train the final model with more epochs
    final_history = best_model.fit(
        final_train_generator,
        epochs=50,  # Full training
        validation_data=final_validation_generator,
        callbacks=callbacks_final,
        verbose=1
    )
    
    print("\n" + "="*80)
    print("ENHANCED HYPERPARAMETER TUNING COMPLETE!")
    print("="*80)
    print(f"Best model saved as 'best_tuned_model_with_betas.h5'")
    print(f"Final validation accuracy: {max(final_history.history['val_accuracy']):.4f}")
    print(f"Final validation AUC: {max(final_history.history['val_auc']):.4f}")
    print(f"\nOptimal Adam Configuration Found:")
    print(f"‚Ä¢ Learning Rate: {best_params['learning_rate']}")
    print(f"‚Ä¢ Beta_1 (momentum): {best_params['beta_1']}")
    print(f"‚Ä¢ Beta_2 (variance): {best_params['beta_2']}")
    
    # Compare with default Adam parameters
    print(f"\nComparison with Default Adam (lr=0.001, Œ≤‚ÇÅ=0.9, Œ≤‚ÇÇ=0.999):")
    if best_params['beta_1'] > 0.9:
        print(f"‚úì Higher momentum (Œ≤‚ÇÅ={best_params['beta_1']}) ‚Üí Smoother convergence")
    elif best_params['beta_1'] < 0.9:
        print(f"‚úì Lower momentum (Œ≤‚ÇÅ={best_params['beta_1']}) ‚Üí Faster adaptation")
    
    if best_params['beta_2'] > 0.999:
        print(f"‚úì Higher variance stability (Œ≤‚ÇÇ={best_params['beta_2']}) ‚Üí Better for sparse gradients")
    elif best_params['beta_2'] < 0.999:
        print(f"‚úì Lower variance adaptation (Œ≤‚ÇÇ={best_params['beta_2']}) ‚Üí Better for dense gradients")
        
else:
    print("Could not find best configuration. Check the search results.")

In [None]:
# Model Ensemble and Comparison Functions
def create_ensemble_predictions(models_info, test_generator, ensemble_method='average'):
    """
    Create ensemble predictions from multiple trained models
    
    Args:
        models_info: List of model info dictionaries from load_best_models()
        test_generator: Test data generator
        ensemble_method: 'average', 'weighted', or 'voting'
    """
    if not models_info:
        print("No models provided for ensemble!")
        return None
    
    print(f"\nCreating ensemble predictions using {len(models_info)} models...")
    print(f"Ensemble method: {ensemble_method}")
    
    # Get predictions from each model
    all_predictions = []
    model_weights = []
    
    test_generator.reset()
    
    for i, model_info in enumerate(models_info):
        print(f"Getting predictions from Trial {model_info['trial']} (Acc: {model_info['accuracy']:.4f})...")
        
        # Get predictions
        predictions = model_info['model'].predict(test_generator, verbose=0)
        all_predictions.append(predictions)
        
        # Weight by validation accuracy for weighted ensemble
        model_weights.append(model_info['accuracy'])
        
        test_generator.reset()  # Reset for next model
    
    # Create ensemble predictions
    all_predictions = np.array(all_predictions)  # Shape: (n_models, n_samples, 1)
    
    if ensemble_method == 'average':
        ensemble_pred = np.mean(all_predictions, axis=0)
    elif ensemble_method == 'weighted':
        weights = np.array(model_weights) / sum(model_weights)
        ensemble_pred = np.average(all_predictions, axis=0, weights=weights)
    elif ensemble_method == 'voting':
        # Convert to binary predictions and take majority vote
        binary_preds = (all_predictions > 0.5).astype(int)
        ensemble_pred = (np.mean(binary_preds, axis=0) > 0.5).astype(float)
    else:
        raise ValueError("ensemble_method must be 'average', 'weighted', or 'voting'")
    
    print(f"Ensemble predictions created using {ensemble_method} method.")
    return ensemble_pred

def compare_individual_vs_ensemble(models_info, test_generator):
    """Compare individual model performance vs ensemble"""
    print("\n" + "="*80)
    print("INDIVIDUAL MODELS vs ENSEMBLE COMPARISON")
    print("="*80)
    
    # Get true labels
    test_generator.reset()
    y_true = []
    for i in range(len(test_generator)):
        batch_x, batch_y = next(test_generator)
        y_true.extend(batch_y)
    y_true = np.array(y_true)
    
    # Individual model results
    print("\nINDIVIDUAL MODEL PERFORMANCE:")
    individual_accuracies = []
    for model_info in models_info:
        test_generator.reset()
        predictions = model_info['model'].predict(test_generator, verbose=0)
        binary_preds = (predictions > 0.5).astype(int).flatten()
        
        accuracy = np.mean(binary_preds == y_true)
        individual_accuracies.append(accuracy)
        
        print(f"Trial {model_info['trial']}: {accuracy:.4f} (Val: {model_info['accuracy']:.4f})")
    
    # Ensemble results
    print(f"\nENSEMBLE PERFORMANCE:")
    ensemble_methods = ['average', 'weighted', 'voting']
    
    ensemble_results = {}
    for method in ensemble_methods:
        ensemble_pred = create_ensemble_predictions(models_info, test_generator, method)
        binary_ensemble = (ensemble_pred > 0.5).astype(int).flatten()
        ensemble_accuracy = np.mean(binary_ensemble == y_true)
        ensemble_results[method] = ensemble_accuracy
        
        print(f"{method.capitalize()} Ensemble: {ensemble_accuracy:.4f}")
    
    # Summary
    best_individual = max(individual_accuracies)
    best_ensemble = max(ensemble_results.values())
    best_method = max(ensemble_results.items(), key=lambda x: x[1])
    
    print(f"\n" + "-"*50)
    print(f"Best Individual Model: {best_individual:.4f}")
    print(f"Best Ensemble ({best_method[0]}): {best_method[1]:.4f}")
    
    if best_ensemble > best_individual:
        improvement = best_ensemble - best_individual
        print(f"‚úì Ensemble improves by {improvement:.4f} ({improvement*100:.2f}%)")
    else:
        print("‚úó Ensemble does not improve over best individual model")
    
    return ensemble_results

def save_model_comparison_report():
    """Save a comprehensive comparison report of all models"""
    if not tuning_results:
        print("No results to save!")
        return
    
    successful_results = [r for r in tuning_results if 'error' not in r]
    
    # Create detailed report
    report = {
        'experiment_info': {
            'total_trials': len(tuning_results),
            'successful_trials': len(successful_results),
            'failed_trials': len(tuning_results) - len(successful_results),
            'timestamp': datetime.now().isoformat()
        },
        'hyperparameter_space': HYPERPARAMETER_SPACE,
        'results': successful_results,
        'best_models': sorted(successful_results, key=lambda x: x['best_val_accuracy'], reverse=True)[:5]
    }
    
    # Save to JSON
    with open('model_comparison_report.json', 'w') as f:
        json.dump(report, f, indent=2)
    
    # Create markdown report
    with open('model_comparison_report.md', 'w') as f:
        f.write("# Hyperparameter Tuning Results Report\n\n")
        f.write(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
        
        f.write("## Experiment Overview\n")
        f.write(f"- Total Trials: {len(tuning_results)}\n")
        f.write(f"- Successful Trials: {len(successful_results)}\n")
        f.write(f"- Failed Trials: {len(tuning_results) - len(successful_results)}\n\n")
        
        f.write("## Top 5 Performing Models\n")
        for i, result in enumerate(report['best_models'][:5]):
            f.write(f"\n### Rank {i+1}: Trial {result['trial']}\n")
            f.write(f"- **Validation Accuracy**: {result['best_val_accuracy']:.4f}\n")
            f.write(f"- **Validation AUC**: {result['best_val_auc']:.4f}\n")
            f.write(f"- **Model File**: `{os.path.basename(result['model_filename'])}`\n")
            f.write(f"- **Parameters**:\n")
            for param, value in result['params'].items():
                f.write(f"  - {param}: {value}\n")
        
        f.write(f"\n## All Model Files\n")
        for result in successful_results:
            filename = os.path.basename(result['model_filename']) if result['model_filename'] else 'N/A'
            f.write(f"- Trial {result['trial']}: `{filename}` (Acc: {result['best_val_accuracy']:.4f})\n")
    
    print("Comprehensive model comparison report saved:")
    print("- model_comparison_report.json (detailed data)")
    print("- model_comparison_report.md (readable report)")

print("Model ensemble and comparison functions ready!")

In [None]:
# Plot training history
def plot_training_history(history):
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

    # Plot accuracy
    ax1.plot(history.history['accuracy'], label='Training Accuracy')
    ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax1.grid(True)

    # Plot loss
    ax2.plot(history.history['loss'], label='Training Loss')
    ax2.plot(history.history['val_loss'], label='Validation Loss')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    ax2.grid(True)

    # Plot AUC
    ax3.plot(history.history['auc'], label='Training AUC')
    ax3.plot(history.history['val_auc'], label='Validation AUC')
    ax3.set_title('Model AUC')
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('AUC')
    ax3.legend()
    ax3.grid(True)

    # Plot Precision and Recall
    ax4.plot(history.history['precision'], label='Training Precision')
    ax4.plot(history.history['val_precision'], label='Validation Precision')
    ax4.plot(history.history['recall'], label='Training Recall')
    ax4.plot(history.history['val_recall'], label='Validation Recall')
    ax4.set_title('Precision and Recall')
    ax4.set_xlabel('Epoch')
    ax4.set_ylabel('Score')
    ax4.legend()
    ax4.grid(True)

    plt.tight_layout()
    plt.show()

    # Print final metrics
    final_train_acc = history.history['accuracy'][-1]
    final_val_acc = history.history['val_accuracy'][-1]
    final_train_auc = history.history['auc'][-1]
    final_val_auc = history.history['val_auc'][-1]
    
    print(f"Final Training Accuracy: {final_train_acc:.4f}")
    print(f"Final Validation Accuracy: {final_val_acc:.4f}")
    print(f"Final Training AUC: {final_train_auc:.4f}")
    print(f"Final Validation AUC: {final_val_auc:.4f}")

plot_training_history(history)

In [None]:

best_model = load_model('best_cats_dogs_model.h5')

In [6]:
# Comprehensive Model Analysis
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import seaborn as sns

def analyze_all_saved_models():
    """Comprehensively analyze all saved models on test data"""
    
    # Get all model files
    models_dir = "/Users/linhnguyen/Documents/fall2025/csce482/project/models"
    model_files = [f for f in os.listdir(models_dir) if f.endswith('.h5')]
    model_files.sort()  # Sort for consistent ordering
    
    print(f"Found {len(model_files)} model files to analyze:")
    for i, filename in enumerate(model_files, 1):
        print(f"  {i}. {filename}")
    
    print(f"\nAnalyzing models on {test_test_generator.samples} test samples...")
    
    # Store results for all models
    all_results = []
    
    # Get true labels once
    test_test_generator.reset()
    y_true = []
    for i in range(len(test_test_generator)):
        batch_x, batch_y = next(test_test_generator)
        y_true.extend(batch_y)
    y_true = np.array(y_true)
    
    print(f"\nTest set distribution:")
    print(f"  Cats (class 0): {np.sum(y_true == 0)} samples")
    print(f"  Non-cats (class 1): {np.sum(y_true == 1)} samples")
    
    # Analyze each model
    print(f"\n{'='*100}")
    print("INDIVIDUAL MODEL ANALYSIS")
    print('='*100)
    
    for i, filename in enumerate(model_files, 1):
        try:
            print(f"\n[{i}/{len(model_files)}] Analyzing: {filename}")
            print("-" * 80)
            
            # Load model
            model_path = os.path.join(models_dir, filename)
            model = load_model(model_path)
            
            # Get predictions
            test_test_generator.reset()
            predictions = model.predict(test_test_generator, verbose=0)
            pred_probs = predictions.flatten()
            pred_binary = (pred_probs > 0.5).astype(int)
            
            # Calculate metrics
            accuracy = np.mean(pred_binary == y_true)
            auc_score = roc_auc_score(y_true, pred_probs)
            
            # Classification report
            class_report = classification_report(y_true, pred_binary, 
                                               target_names=['Cats', 'Non-cats'], 
                                               output_dict=True)
            
            # Confusion matrix
            cm = confusion_matrix(y_true, pred_binary)
            
            # Extract model info from filename
            model_info = parse_model_filename(filename)
            
            # Store results
            result = {
                'filename': filename,
                'model_info': model_info,
                'accuracy': accuracy,
                'auc': auc_score,
                'predictions': pred_probs,
                'binary_predictions': pred_binary,
                'classification_report': class_report,
                'confusion_matrix': cm
            }
            all_results.append(result)
            
            # Print key metrics
            print(f"Accuracy: {accuracy:.4f}")
            print(f"AUC: {auc_score:.4f}")
            print(f"Precision (Cats): {class_report['Cats']['precision']:.4f}")
            print(f"Recall (Cats): {class_report['Cats']['recall']:.4f}")
            print(f"Precision (Non-cats): {class_report['Non-cats']['precision']:.4f}")
            print(f"Recall (Non-cats): {class_report['Non-cats']['recall']:.4f}")
            
            if model_info:
                print(f"\nModel Parameters:")
                print(f"  Learning Rate: {model_info.get('lr', 'N/A')}")
                print(f"  Beta1: {model_info.get('b1', 'N/A')}")
                print(f"  Beta2: {model_info.get('b2', 'N/A')}")
                print(f"  Batch Size: {model_info.get('bs', 'N/A')}")
                print(f"  Regularization: {model_info.get('reg', 'N/A')}")
                print(f"  Dense Units: {model_info.get('du', 'N/A')}")
            
        except Exception as e:
            print(f"‚ùå Error analyzing {filename}: {str(e)}")
            continue
    
    # Sort results by accuracy
    all_results.sort(key=lambda x: x['accuracy'], reverse=True)
    
    # Performance summary
    print(f"\n{'='*100}")
    print("PERFORMANCE RANKING")
    print('='*100)
    
    for i, result in enumerate(all_results, 1):
        print(f"{i:2d}. {result['filename'][:60]:<60} | Acc: {result['accuracy']:.4f} | AUC: {result['auc']:.4f}")
    
    return all_results, y_true

def parse_model_filename(filename):
    """Extract parameters from model filename"""
    try:
        if filename.startswith('trial_'):
            # Parse trial files: trial_01_lr0.0005_b1-0.9_b2-0.999_bs32_reg0.0005_du512.h5
            parts = filename.replace('.h5', '').split('_')
            info = {}
            for part in parts:
                if part.startswith('lr'):
                    info['lr'] = float(part[2:])
                elif part.startswith('b1-'):
                    info['b1'] = float(part[3:])
                elif part.startswith('b2-'):
                    info['b2'] = float(part[3:])
                elif part.startswith('bs'):
                    info['bs'] = int(part[2:])
                elif part.startswith('reg'):
                    info['reg'] = float(part[3:])
                elif part.startswith('du'):
                    info['du'] = int(part[2:])
            return info
    except:
        pass
    return None

print("Model analysis functions ready!")

Model analysis functions ready!


In [7]:
# Simple Model Analysis (without complex visualizations)
print("üîç RUNNING SIMPLIFIED MODEL ANALYSIS")
print("="*80)

# Run just the analysis part
all_results, y_true = analyze_all_saved_models()

print(f"\nüìä SUMMARY STATISTICS:")
print("-" * 50)

accuracies = [r['accuracy'] for r in all_results]
aucs = [r['auc'] for r in all_results]

print(f"Number of models: {len(all_results)}")
print(f"Test samples: {len(y_true)}")
print(f"Accuracy - Mean: {np.mean(accuracies):.4f} ¬± {np.std(accuracies):.4f}")
print(f"Accuracy - Range: {np.min(accuracies):.4f} to {np.max(accuracies):.4f}")
print(f"AUC - Mean: {np.mean(aucs):.4f} ¬± {np.std(aucs):.4f}")
print(f"AUC - Range: {np.min(aucs):.4f} to {np.max(aucs):.4f}")

print(f"\nüèÜ TOP 5 MODELS:")
print("-" * 50)
for i, result in enumerate(all_results[:5], 1):
    print(f"{i}. {result['filename']}")
    print(f"   Accuracy: {result['accuracy']:.4f} | AUC: {result['auc']:.4f}")
    if result['model_info']:
        info = result['model_info']
        print(f"   LR: {info.get('lr', 'N/A')} | Œ≤‚ÇÅ: {info.get('b1', 'N/A')} | Œ≤‚ÇÇ: {info.get('b2', 'N/A')}")

print(f"\n‚úÖ Analysis complete!")

üîç RUNNING SIMPLIFIED MODEL ANALYSIS
Found 13 model files to analyze:
  1. best_tuned_model_with_betas.h5
  2. trial_01_lr0.0005_b1-0.9_b2-0.999_bs32_reg0.0005_du512.h5
  3. trial_02_lr0.001_b1-0.95_b2-0.999_bs32_reg0.0001_du512.h5
  4. trial_03_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du512.h5
  5. trial_04_lr0.001_b1-0.8_b2-0.999_bs32_reg0.0005_du512.h5
  6. trial_05_lr0.0005_b1-0.9_b2-0.99_bs32_reg0.0005_du512.h5
  7. trial_06_lr0.0001_b1-0.9_b2-0.9999_bs32_reg0.001_du512.h5
  8. trial_07_lr0.0005_b1-0.95_b2-0.9999_bs64_reg0.0005_du1024.h5
  9. trial_08_lr0.001_b1-0.8_b2-0.99_bs16_reg0.0001_du512.h5
  10. trial_09_lr0.0001_b1-0.95_b2-0.9999_bs64_reg0.001_du256.h5
  11. trial_10_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du1024.h5
  12. trial_11_lr0.001_b1-0.8_b2-0.99_bs64_reg0.0001_du512.h5
  13. trial_12_lr0.0001_b1-0.99_b2-0.9999_bs32_reg0.0005_du512.h5

Analyzing models on 1068 test samples...





Test set distribution:
  Cats (class 0): 534 samples
  Non-cats (class 1): 534 samples

INDIVIDUAL MODEL ANALYSIS

[1/13] Analyzing: best_tuned_model_with_betas.h5
--------------------------------------------------------------------------------




Accuracy: 0.9625
AUC: 0.9930
Precision (Cats): 0.9458
Recall (Cats): 0.9813
Precision (Non-cats): 0.9805
Recall (Non-cats): 0.9438

[2/13] Analyzing: trial_01_lr0.0005_b1-0.9_b2-0.999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9213
AUC: 0.9764
Precision (Cats): 0.9518
Recall (Cats): 0.8876
Precision (Non-cats): 0.8947
Recall (Non-cats): 0.9551

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.9
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[3/13] Analyzing: trial_02_lr0.001_b1-0.95_b2-0.999_bs32_reg0.0001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.8886
AUC: 0.9744
Precision (Cats): 0.9621
Recall (Cats): 0.8090
Precision (Non-cats): 0.8352
Recall (Non-cats): 0.9682

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.95
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0001
  Dense Units: 512

[4/13] Analyzing: trial_03_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9073
AUC: 0.9773
Precision (Cats): 0.9718
Recall (Cats): 0.8390
Precision (Non-cats): 0.8583
Recall (Non-cats): 0.9757

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.99
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[5/13] Analyzing: trial_04_lr0.001_b1-0.8_b2-0.999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.8895
AUC: 0.9811
Precision (Cats): 0.9860
Recall (Cats): 0.7903
Precision (Non-cats): 0.8250
Recall (Non-cats): 0.9888

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.8
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[6/13] Analyzing: trial_05_lr0.0005_b1-0.9_b2-0.99_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9232
AUC: 0.9825
Precision (Cats): 0.9728
Recall (Cats): 0.8708
Precision (Non-cats): 0.8831
Recall (Non-cats): 0.9757

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.9
  Beta2: 0.99
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[7/13] Analyzing: trial_06_lr0.0001_b1-0.9_b2-0.9999_bs32_reg0.001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.7116
AUC: 0.8789
Precision (Cats): 0.9007
Recall (Cats): 0.4757
Precision (Non-cats): 0.6438
Recall (Non-cats): 0.9476

Model Parameters:
  Learning Rate: 0.0001
  Beta1: 0.9
  Beta2: 0.9999
  Batch Size: 32
  Regularization: 0.001
  Dense Units: 512

[8/13] Analyzing: trial_07_lr0.0005_b1-0.95_b2-0.9999_bs64_reg0.0005_du1024.h5
--------------------------------------------------------------------------------




Accuracy: 0.9204
AUC: 0.9778
Precision (Cats): 0.9535
Recall (Cats): 0.8839
Precision (Non-cats): 0.8918
Recall (Non-cats): 0.9569

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.95
  Beta2: 0.9999
  Batch Size: 64
  Regularization: 0.0005
  Dense Units: 1024

[9/13] Analyzing: trial_08_lr0.001_b1-0.8_b2-0.99_bs16_reg0.0001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9176
AUC: 0.9837
Precision (Cats): 0.9785
Recall (Cats): 0.8539
Precision (Non-cats): 0.8704
Recall (Non-cats): 0.9813

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.8
  Beta2: 0.99
  Batch Size: 16
  Regularization: 0.0001
  Dense Units: 512

[10/13] Analyzing: trial_09_lr0.0001_b1-0.95_b2-0.9999_bs64_reg0.001_du256.h5
--------------------------------------------------------------------------------




Accuracy: 0.7303
AUC: 0.8076
Precision (Cats): 0.7236
Recall (Cats): 0.7453
Precision (Non-cats): 0.7375
Recall (Non-cats): 0.7154

Model Parameters:
  Learning Rate: 0.0001
  Beta1: 0.95
  Beta2: 0.9999
  Batch Size: 64
  Regularization: 0.001
  Dense Units: 256

[11/13] Analyzing: trial_10_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du1024.h5
--------------------------------------------------------------------------------




Accuracy: 0.9232
AUC: 0.9809
Precision (Cats): 0.9538
Recall (Cats): 0.8895
Precision (Non-cats): 0.8965
Recall (Non-cats): 0.9569

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.99
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 1024

[12/13] Analyzing: trial_11_lr0.001_b1-0.8_b2-0.99_bs64_reg0.0001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9335
AUC: 0.9872
Precision (Cats): 0.9443
Recall (Cats): 0.9213
Precision (Non-cats): 0.9232
Recall (Non-cats): 0.9457

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.8
  Beta2: 0.99
  Batch Size: 64
  Regularization: 0.0001
  Dense Units: 512

[13/13] Analyzing: trial_12_lr0.0001_b1-0.99_b2-0.9999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------
Accuracy: 0.8146
AUC: 0.8854
Precision (Cats): 0.8032
Recall (Cats): 0.8333
Precision (Non-cats): 0.8268
Recall (Non-cats): 0.7959

Model Parameters:
  Learning Rate: 0.0001
  Beta1: 0.99
  Beta2: 0.9999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

PERFORMANCE RANKING
 1. best_tuned_model_with_betas.h5                               | Acc: 0.9625 | AUC: 0.9930
 2. trial_11_lr0.001_b1-0.8_b2-0.99_bs64_reg0.0001_du512.h5      | Acc: 0.9335 | AUC: 0.9872
 3. trial_05_lr0.0005_b1-0.9_b2-0.99_bs32_reg0.0005_du512.h5     | Acc: 0.9232 | AUC: 0.9825
 4. trial_10_

In [4]:
def create_model_comparison_visualizations(all_results, y_true):
    """Create comprehensive visualizations comparing all models"""
    
    n_models = len(all_results)
    
    # Create a large figure with multiple subplots
    fig = plt.figure(figsize=(20, 15))
    
    # 1. Accuracy comparison bar plot
    plt.subplot(3, 3, 1)
    accuracies = [r['accuracy'] for r in all_results]
    model_names = [r['filename'][:20] + '...' if len(r['filename']) > 20 else r['filename'] for r in all_results]
    
    bars = plt.bar(range(len(accuracies)), accuracies)
    plt.xlabel('Model')
    plt.ylabel('Test Accuracy')
    plt.title('Test Accuracy Comparison')
    plt.xticks(range(len(accuracies)), [f'M{i+1}' for i in range(len(accuracies))], rotation=45)
    plt.grid(axis='y', alpha=0.3)
    
    # Color bars by performance
    max_acc = max(accuracies)
    for i, bar in enumerate(bars):
        if accuracies[i] == max_acc:
            bar.set_color('gold')
        elif accuracies[i] >= max_acc - 0.01:
            bar.set_color('lightgreen')
        else:
            bar.set_color('lightblue')
    
    # 2. AUC comparison
    plt.subplot(3, 3, 2)
    aucs = [r['auc'] for r in all_results]
    bars = plt.bar(range(len(aucs)), aucs)
    plt.xlabel('Model')
    plt.ylabel('AUC')
    plt.title('AUC Comparison')
    plt.xticks(range(len(aucs)), [f'M{i+1}' for i in range(len(aucs))], rotation=45)
    plt.grid(axis='y', alpha=0.3)
    
    # 3. ROC Curves for top 5 models
    plt.subplot(3, 3, 3)
    colors = plt.cm.Set1(np.linspace(0, 1, min(5, len(all_results))))
    
    for i, result in enumerate(all_results[:5]):
        fpr, tpr, _ = roc_curve(y_true, result['predictions'])
        plt.plot(fpr, tpr, color=colors[i], 
                label=f"M{i+1} (AUC={result['auc']:.3f})", linewidth=2)
    
    plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curves (Top 5 Models)')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(alpha=0.3)
    
    # 4. Accuracy vs AUC scatter
    plt.subplot(3, 3, 4)
    plt.scatter(accuracies, aucs, alpha=0.7, s=100)
    for i, (acc, auc) in enumerate(zip(accuracies, aucs)):
        plt.annotate(f'M{i+1}', (acc, auc), xytext=(5, 5), textcoords='offset points', fontsize=8)
    plt.xlabel('Test Accuracy')
    plt.ylabel('AUC')
    plt.title('Accuracy vs AUC')
    plt.grid(alpha=0.3)
    
    # 5. Parameter analysis (if available)
    plt.subplot(3, 3, 5)
    trial_results = [r for r in all_results if r['model_info'] is not None]
    if trial_results:
        learning_rates = [r['model_info']['lr'] for r in trial_results]
        lr_accuracies = [r['accuracy'] for r in trial_results]
        plt.scatter(learning_rates, lr_accuracies, alpha=0.7, s=100)
        plt.xlabel('Learning Rate')
        plt.ylabel('Test Accuracy')
        plt.title('Learning Rate vs Accuracy')
        plt.xscale('log')
        plt.grid(alpha=0.3)
    
    # 6. Beta parameter analysis
    plt.subplot(3, 3, 6)
    if trial_results:
        beta1_values = [r['model_info']['b1'] for r in trial_results]
        beta1_accuracies = [r['accuracy'] for r in trial_results]
        plt.scatter(beta1_values, beta1_accuracies, alpha=0.7, s=100)
        plt.xlabel('Beta1 (Momentum)')
        plt.ylabel('Test Accuracy')
        plt.title('Beta1 vs Accuracy')
        plt.grid(alpha=0.3)
    
    # 7. Confusion matrix for best model
    plt.subplot(3, 3, 7)
    best_model = all_results[0]
    cm = best_model['confusion_matrix']
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['Cats', 'Non-cats'], 
                yticklabels=['Cats', 'Non-cats'])
    plt.title(f'Best Model Confusion Matrix\n({best_model["filename"][:30]}...)')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    # 8. Performance distribution
    plt.subplot(3, 3, 8)
    plt.hist(accuracies, bins=10, alpha=0.7, edgecolor='black')
    plt.axvline(np.mean(accuracies), color='red', linestyle='--', 
                label=f'Mean: {np.mean(accuracies):.3f}')
    plt.axvline(np.median(accuracies), color='orange', linestyle='--', 
                label=f'Median: {np.median(accuracies):.3f}')
    plt.xlabel('Test Accuracy')
    plt.ylabel('Number of Models')
    plt.title('Accuracy Distribution')
    plt.legend()
    plt.grid(axis='y', alpha=0.3)
    
    # 9. Top vs Bottom models comparison
    plt.subplot(3, 3, 9)
    top_3_acc = np.mean([r['accuracy'] for r in all_results[:3]])
    bottom_3_acc = np.mean([r['accuracy'] for r in all_results[-3:]])
    top_3_auc = np.mean([r['auc'] for r in all_results[:3]])
    bottom_3_auc = np.mean([r['auc'] for r in all_results[-3:]])
    
    categories = ['Top 3', 'Bottom 3']
    acc_values = [top_3_acc, bottom_3_acc]
    auc_values = [top_3_auc, bottom_3_auc]
    
    x = np.arange(len(categories))
    width = 0.35
    
    plt.bar(x - width/2, acc_values, width, label='Accuracy', alpha=0.8)
    plt.bar(x + width/2, auc_values, width, label='AUC', alpha=0.8)
    plt.xlabel('Model Groups')
    plt.ylabel('Performance')
    plt.title('Top vs Bottom Models')
    plt.xticks(x, categories)
    plt.legend()
    plt.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return fig

def generate_detailed_report(all_results, y_true):
    """Generate a detailed text report of all models"""
    
    print(f"\n{'='*120}")
    print("DETAILED MODEL ANALYSIS REPORT")
    print('='*120)
    
    # Overall statistics
    accuracies = [r['accuracy'] for r in all_results]
    aucs = [r['auc'] for r in all_results]
    
    print(f"\nOVERALL STATISTICS:")
    print(f"  Number of models analyzed: {len(all_results)}")
    print(f"  Test samples: {len(y_true)}")
    print(f"  Accuracy - Mean: {np.mean(accuracies):.4f}, Std: {np.std(accuracies):.4f}")
    print(f"  Accuracy - Min: {np.min(accuracies):.4f}, Max: {np.max(accuracies):.4f}")
    print(f"  AUC - Mean: {np.mean(aucs):.4f}, Std: {np.std(aucs):.4f}")
    print(f"  AUC - Min: {np.min(aucs):.4f}, Max: {np.max(aucs):.4f}")
    
    # Top 3 models detailed analysis
    print(f"\nTOP 3 MODELS DETAILED ANALYSIS:")
    print("-" * 120)
    
    for i, result in enumerate(all_results[:3], 1):
        print(f"\nüèÜ RANK {i}: {result['filename']}")
        print(f"   Test Accuracy: {result['accuracy']:.4f}")
        print(f"   AUC Score: {result['auc']:.4f}")
        
        # Confusion matrix details
        cm = result['confusion_matrix']
        tn, fp, fn, tp = cm.ravel()
        
        print(f"   Confusion Matrix:")
        print(f"     True Negatives (Correct Cats): {tn}")
        print(f"     False Positives (Cats as Non-cats): {fp}")
        print(f"     False Negatives (Non-cats as Cats): {fn}")
        print(f"     True Positives (Correct Non-cats): {tp}")
        
        # Class-specific metrics
        cats_precision = result['classification_report']['Cats']['precision']
        cats_recall = result['classification_report']['Cats']['recall']
        cats_f1 = result['classification_report']['Cats']['f1-score']
        
        noncats_precision = result['classification_report']['Non-cats']['precision']
        noncats_recall = result['classification_report']['Non-cats']['recall']
        noncats_f1 = result['classification_report']['Non-cats']['f1-score']
        
        print(f"   Class-specific Performance:")
        print(f"     Cats - Precision: {cats_precision:.4f}, Recall: {cats_recall:.4f}, F1: {cats_f1:.4f}")
        print(f"     Non-cats - Precision: {noncats_precision:.4f}, Recall: {noncats_recall:.4f}, F1: {noncats_f1:.4f}")
        
        # Model parameters if available
        if result['model_info']:
            info = result['model_info']
            print(f"   Hyperparameters:")
            print(f"     Learning Rate: {info.get('lr', 'N/A')}")
            print(f"     Adam Beta1: {info.get('b1', 'N/A')}")
            print(f"     Adam Beta2: {info.get('b2', 'N/A')}")
            print(f"     Batch Size: {info.get('bs', 'N/A')}")
            print(f"     Regularization: {info.get('reg', 'N/A')}")
            print(f"     Dense Units: {info.get('du', 'N/A')}")
    
    # Parameter analysis
    trial_results = [r for r in all_results if r['model_info'] is not None]
    if trial_results:
        print(f"\nHYPERPARAMETER ANALYSIS:")
        print("-" * 60)
        
        # Learning rate analysis
        lr_performance = {}
        for result in trial_results:
            lr = result['model_info']['lr']
            if lr not in lr_performance:
                lr_performance[lr] = []
            lr_performance[lr].append(result['accuracy'])
        
        print(f"\nLearning Rate Performance:")
        for lr, accs in sorted(lr_performance.items()):
            avg_acc = np.mean(accs)
            std_acc = np.std(accs)
            print(f"  LR {lr}: {avg_acc:.4f} ¬± {std_acc:.4f} (n={len(accs)})")
        
        # Beta parameters analysis
        beta1_performance = {}
        beta2_performance = {}
        
        for result in trial_results:
            b1 = result['model_info']['b1']
            b2 = result['model_info']['b2']
            acc = result['accuracy']
            
            if b1 not in beta1_performance:
                beta1_performance[b1] = []
            beta1_performance[b1].append(acc)
            
            if b2 not in beta2_performance:
                beta2_performance[b2] = []
            beta2_performance[b2].append(acc)
        
        print(f"\nBeta1 (Momentum) Performance:")
        for b1, accs in sorted(beta1_performance.items()):
            avg_acc = np.mean(accs)
            std_acc = np.std(accs)
            print(f"  Œ≤‚ÇÅ={b1}: {avg_acc:.4f} ¬± {std_acc:.4f} (n={len(accs)})")
        
        print(f"\nBeta2 (Variance) Performance:")
        for b2, accs in sorted(beta2_performance.items()):
            avg_acc = np.mean(accs)
            std_acc = np.std(accs)
            print(f"  Œ≤‚ÇÇ={b2}: {avg_acc:.4f} ¬± {std_acc:.4f} (n={len(accs)})")

print("Visualization and reporting functions ready!")

Visualization and reporting functions ready!


In [5]:
# Execute Comprehensive Model Analysis
print("üîç STARTING COMPREHENSIVE MODEL ANALYSIS")
print("="*80)

# Run the analysis
all_results, y_true = analyze_all_saved_models()

# Generate visualizations
print(f"\nüìä Creating comprehensive visualizations...")
fig = create_model_comparison_visualizations(all_results, y_true)

# Generate detailed report
generate_detailed_report(all_results, y_true)

print(f"\n‚úÖ Model analysis complete!")
print(f"üìà {len(all_results)} models analyzed on {len(y_true)} test samples")

üîç STARTING COMPREHENSIVE MODEL ANALYSIS
Found 13 model files to analyze:
  1. best_tuned_model_with_betas.h5
  2. trial_01_lr0.0005_b1-0.9_b2-0.999_bs32_reg0.0005_du512.h5
  3. trial_02_lr0.001_b1-0.95_b2-0.999_bs32_reg0.0001_du512.h5
  4. trial_03_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du512.h5
  5. trial_04_lr0.001_b1-0.8_b2-0.999_bs32_reg0.0005_du512.h5
  6. trial_05_lr0.0005_b1-0.9_b2-0.99_bs32_reg0.0005_du512.h5
  7. trial_06_lr0.0001_b1-0.9_b2-0.9999_bs32_reg0.001_du512.h5
  8. trial_07_lr0.0005_b1-0.95_b2-0.9999_bs64_reg0.0005_du1024.h5
  9. trial_08_lr0.001_b1-0.8_b2-0.99_bs16_reg0.0001_du512.h5
  10. trial_09_lr0.0001_b1-0.95_b2-0.9999_bs64_reg0.001_du256.h5
  11. trial_10_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du1024.h5
  12. trial_11_lr0.001_b1-0.8_b2-0.99_bs64_reg0.0001_du512.h5
  13. trial_12_lr0.0001_b1-0.99_b2-0.9999_bs32_reg0.0005_du512.h5

Analyzing models on 1068 test samples...





Test set distribution:
  Cats (class 0): 534 samples
  Non-cats (class 1): 534 samples

INDIVIDUAL MODEL ANALYSIS

[1/13] Analyzing: best_tuned_model_with_betas.h5
--------------------------------------------------------------------------------


  self._warn_if_super_not_called()


Accuracy: 0.9625
AUC: 0.9930
Precision (Cats): 0.9458
Recall (Cats): 0.9813
Precision (Non-cats): 0.9805
Recall (Non-cats): 0.9438

[2/13] Analyzing: trial_01_lr0.0005_b1-0.9_b2-0.999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9213
AUC: 0.9764
Precision (Cats): 0.9518
Recall (Cats): 0.8876
Precision (Non-cats): 0.8947
Recall (Non-cats): 0.9551

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.9
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[3/13] Analyzing: trial_02_lr0.001_b1-0.95_b2-0.999_bs32_reg0.0001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.8886
AUC: 0.9744
Precision (Cats): 0.9621
Recall (Cats): 0.8090
Precision (Non-cats): 0.8352
Recall (Non-cats): 0.9682

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.95
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0001
  Dense Units: 512

[4/13] Analyzing: trial_03_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9073
AUC: 0.9773
Precision (Cats): 0.9718
Recall (Cats): 0.8390
Precision (Non-cats): 0.8583
Recall (Non-cats): 0.9757

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.99
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[5/13] Analyzing: trial_04_lr0.001_b1-0.8_b2-0.999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.8895
AUC: 0.9811
Precision (Cats): 0.9860
Recall (Cats): 0.7903
Precision (Non-cats): 0.8250
Recall (Non-cats): 0.9888

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.8
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[6/13] Analyzing: trial_05_lr0.0005_b1-0.9_b2-0.99_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9232
AUC: 0.9825
Precision (Cats): 0.9728
Recall (Cats): 0.8708
Precision (Non-cats): 0.8831
Recall (Non-cats): 0.9757

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.9
  Beta2: 0.99
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

[7/13] Analyzing: trial_06_lr0.0001_b1-0.9_b2-0.9999_bs32_reg0.001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.7116
AUC: 0.8789
Precision (Cats): 0.9007
Recall (Cats): 0.4757
Precision (Non-cats): 0.6438
Recall (Non-cats): 0.9476

Model Parameters:
  Learning Rate: 0.0001
  Beta1: 0.9
  Beta2: 0.9999
  Batch Size: 32
  Regularization: 0.001
  Dense Units: 512

[8/13] Analyzing: trial_07_lr0.0005_b1-0.95_b2-0.9999_bs64_reg0.0005_du1024.h5
--------------------------------------------------------------------------------




Accuracy: 0.9204
AUC: 0.9778
Precision (Cats): 0.9535
Recall (Cats): 0.8839
Precision (Non-cats): 0.8918
Recall (Non-cats): 0.9569

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.95
  Beta2: 0.9999
  Batch Size: 64
  Regularization: 0.0005
  Dense Units: 1024

[9/13] Analyzing: trial_08_lr0.001_b1-0.8_b2-0.99_bs16_reg0.0001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9176
AUC: 0.9837
Precision (Cats): 0.9785
Recall (Cats): 0.8539
Precision (Non-cats): 0.8704
Recall (Non-cats): 0.9813

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.8
  Beta2: 0.99
  Batch Size: 16
  Regularization: 0.0001
  Dense Units: 512

[10/13] Analyzing: trial_09_lr0.0001_b1-0.95_b2-0.9999_bs64_reg0.001_du256.h5
--------------------------------------------------------------------------------




Accuracy: 0.7303
AUC: 0.8076
Precision (Cats): 0.7236
Recall (Cats): 0.7453
Precision (Non-cats): 0.7375
Recall (Non-cats): 0.7154

Model Parameters:
  Learning Rate: 0.0001
  Beta1: 0.95
  Beta2: 0.9999
  Batch Size: 64
  Regularization: 0.001
  Dense Units: 256

[11/13] Analyzing: trial_10_lr0.0005_b1-0.99_b2-0.999_bs32_reg0.0005_du1024.h5
--------------------------------------------------------------------------------




Accuracy: 0.9232
AUC: 0.9809
Precision (Cats): 0.9538
Recall (Cats): 0.8895
Precision (Non-cats): 0.8965
Recall (Non-cats): 0.9569

Model Parameters:
  Learning Rate: 0.0005
  Beta1: 0.99
  Beta2: 0.999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 1024

[12/13] Analyzing: trial_11_lr0.001_b1-0.8_b2-0.99_bs64_reg0.0001_du512.h5
--------------------------------------------------------------------------------




Accuracy: 0.9335
AUC: 0.9872
Precision (Cats): 0.9443
Recall (Cats): 0.9213
Precision (Non-cats): 0.9232
Recall (Non-cats): 0.9457

Model Parameters:
  Learning Rate: 0.001
  Beta1: 0.8
  Beta2: 0.99
  Batch Size: 64
  Regularization: 0.0001
  Dense Units: 512

[13/13] Analyzing: trial_12_lr0.0001_b1-0.99_b2-0.9999_bs32_reg0.0005_du512.h5
--------------------------------------------------------------------------------
Accuracy: 0.8146
AUC: 0.8854
Precision (Cats): 0.8032
Recall (Cats): 0.8333
Precision (Non-cats): 0.8268
Recall (Non-cats): 0.7959

Model Parameters:
  Learning Rate: 0.0001
  Beta1: 0.99
  Beta2: 0.9999
  Batch Size: 32
  Regularization: 0.0005
  Dense Units: 512

PERFORMANCE RANKING
 1. best_tuned_model_with_betas.h5                               | Acc: 0.9625 | AUC: 0.9930
 2. trial_11_lr0.001_b1-0.8_b2-0.99_bs64_reg0.0001_du512.h5      | Acc: 0.9335 | AUC: 0.9872
 3. trial_05_lr0.0005_b1-0.9_b2-0.99_bs32_reg0.0005_du512.h5     | Acc: 0.9232 | AUC: 0.9825
 4. trial_10_

NameError: name 'plt' is not defined