In [1]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Num GPUs Available:  1


In [None]:
from pathlib import Path
import os

# Get current working directory
current_dir = Path(os.getcwd()).parent

# Adjust path based on your notebook location relative to data
npz_path = current_dir / 'data'  / '2-Data' / 'GoldCoast' / 'current_wind_20100101_20241231_GoaldCoast.npz'
stings_path = current_dir / 'data' / '2-Data' / 'GoldCoast' / 'goaldcoast_stings.csv'

# For .py files
# npz_path = Path(__file__).parent.parent / 'data'  / '2-Data' / 'GoldCoast' / 'current_wind_20100101_20241231_GoaldCoast.npz'
# stings_path = Path(__file__).parent.parent / 'data'  / '2-Data' / 'GoldCoast' / 'goaldcoast_stings.csv'


In [None]:

# Comprehensive 3D CNN for Bluebottle Sting Prediction
# This code handles data combination, preprocessing, and model building

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("\n" + "="*70)
print("STEP 1: LOADING AND COMBINING DATASETS")
print("="*70)

# Load environmental data
print("\nLoading environmental data from NPZ file...")
env_data = np.load(npz_path)

# Extract data
UVTs_data = env_data['UVTempSalt_UVTs']  # Shape: (5479, 6, 15, 15)
crop_lon_min = env_data['crop_lon_min']
crop_lon_max = env_data['crop_lon_max']
crop_lat_min = env_data['crop_lat_min']
crop_lat_max = env_data['crop_lat_max']

print(f"Environmental data shape: {UVTs_data.shape}")
print(f"Dimensions: {UVTs_data.shape[0]} days, {UVTs_data.shape[1]} channels, {UVTs_data.shape[2]}x{UVTs_data.shape[3]} spatial")
print(f"Channels: 1=U_current, 2=V_current, 3=SST, 4=Salinity, 5=U_wind, 6=V_wind")
print(f"Geographic bounds: Lon[{crop_lon_min:.2f}, {crop_lon_max:.2f}], Lat[{crop_lat_min:.2f}, {crop_lat_max:.2f}]")

# Create date range for environmental data
start_date = datetime(2010, 1, 1)
env_dates = pd.date_range(start=start_date, periods=UVTs_data.shape[0], freq='D')
print(f"Date range: {env_dates[0]} to {env_dates[-1]}")

# Load sting data
print("\nLoading sting data from CSV...")
sting_data = pd.read_csv(stings_path)
print(f"Sting data shape: {sting_data.shape}")
print(f"Columns: {list(sting_data.columns)}")


# Convert Time column to datetime (robust parsing)
# Many files use day-first format like '14/01/2011' so set dayfirst=True.
# Use errors='coerce' to detect unparsable values, then try fallbacks if needed.
sting_data['time_parsed'] = pd.to_datetime(sting_data['time'], dayfirst=True, errors='coerce')
# Report how many failed to parse with the primary parser
n_failed = sting_data['time_parsed'].isna().sum()

print(f"\nSting data date range: {sting_data['time'].min()} to {sting_data['time'].max()}")
print(f"Number of sting records: {len(sting_data)}")
print(f"\nClass distribution:")
print(sting_data['stings_Binary'].value_counts())




STEP 1: LOADING AND COMBINING DATASETS

Loading environmental data from NPZ file...
Environmental data shape: (5479, 6, 15, 15)
Dimensions: 5479 days, 6 channels, 15x15 spatial
Channels: 1=U_current, 2=V_current, 3=SST, 4=Salinity, 5=U_wind, 6=V_wind
Geographic bounds: Lon[153.40, 154.87], Lat[-27.75, -29.20]
Date range: 2010-01-01 00:00:00 to 2024-12-31 00:00:00

Loading sting data from CSV...
Sting data shape: (797, 3)
Columns: ['time', 'stings_sum', 'stings_Binary']

Sting data date range: 1/01/2011 to 9/12/2023
Number of sting records: 797

Class distribution:
stings_Binary
0    570
1    227
Name: count, dtype: int64


In [None]:



import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import os
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("="*80)
print("3D CNN FOR BLUEBOTTLE STING PREDICTION - NO AUGMENTATION")
print("="*80)
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU'))} device(s)")
print("="*80)


3D CNN FOR BLUEBOTTLE STING PREDICTION - NO AUGMENTATION
TensorFlow version: 2.10.1
GPU Available: 1 device(s)


In [15]:

# ============================================================================
# STEP 1: LOAD AND COMBINE DATASETS
# ============================================================================

print("\n" + "="*80)
print("STEP 1: LOADING AND COMBINING DATASETS")
print("="*80)

# Load environmental data
print("\nLoading environmental data from NPZ file...")
env_data = np.load(npz_path)

UVTs_data = env_data['UVTempSalt_UVTs']  # Shape: (5479, 6, 15, 15)
crop_lon_min = env_data['crop_lon_min']
crop_lon_max = env_data['crop_lon_max']
crop_lat_min = env_data['crop_lat_min']
crop_lat_max = env_data['crop_lat_max']

print(f"Environmental data shape: {UVTs_data.shape}")
print(f"  - Days: {UVTs_data.shape[0]}")
print(f"  - Channels: {UVTs_data.shape[1]} (U_current, V_current, SST, Salinity, U_wind, V_wind)")
print(f"  - Spatial grid: {UVTs_data.shape[2]}x{UVTs_data.shape[3]} pixels")
print(f"  - Geographic bounds: Lon[{crop_lon_min:.2f}, {crop_lon_max:.2f}], Lat[{crop_lat_min:.2f}, {crop_lat_max:.2f}]")

# Create date range for environmental data
start_date = datetime(2010, 1, 1)
env_dates = pd.date_range(start=start_date, periods=UVTs_data.shape[0], freq='D')
print(f"  - Date range: {env_dates[0].date()} to {env_dates[-1].date()}")

# Load sting data
sting_data = pd.read_csv(stings_path)
sting_data['time'] = pd.to_datetime(sting_data['time'], dayfirst=True)

print(f"Sting data shape: {sting_data.shape}")
print(f"  - Records: {len(sting_data)}")
print(f"  - Date range: {sting_data['time'].min().date()} to {sting_data['time'].max().date()}")
print(f"  - Columns: {list(sting_data.columns)}")

# Match dates
print("\n Matching dates between environmental and sting data...")
env_df = pd.DataFrame({'Date': env_dates, 'env_index': range(len(env_dates))})
sting_data['Date'] = sting_data['time'].dt.date
env_df['Date'] = env_df['Date'].dt.date

merged_data = sting_data.merge(env_df, on='Date', how='inner')
print(f"Matched records: {len(merged_data)}")

# Extract matched environmental data and labels
matched_indices = merged_data['env_index'].values
X = UVTs_data[matched_indices]  # Shape: (n_matched, 6, 15, 15)
y_binary = merged_data['stings_Binary'].values
y_sum = merged_data['stings_sum'].values

print(f"\n Final dataset shape: {X.shape}")
print(f"Label distribution (binary):")
unique, counts = np.unique(y_binary, return_counts=True)
for label, count in zip(unique, counts):
    print(f"  - Class {label}: {count} samples ({count/len(y_binary)*100:.1f}%)")



STEP 1: LOADING AND COMBINING DATASETS

Loading environmental data from NPZ file...
Environmental data shape: (5479, 6, 15, 15)
  - Days: 5479
  - Channels: 6 (U_current, V_current, SST, Salinity, U_wind, V_wind)
  - Spatial grid: 15x15 pixels
  - Geographic bounds: Lon[153.40, 154.87], Lat[-27.75, -29.20]
  - Date range: 2010-01-01 to 2024-12-31
Sting data shape: (797, 3)
  - Records: 797
  - Date range: 2009-01-03 to 2025-02-23
  - Columns: ['time', 'stings_sum', 'stings_Binary']

 Matching dates between environmental and sting data...
Matched records: 744

 Final dataset shape: (744, 6, 15, 15)
Label distribution (binary):
  - Class 0: 545 samples (73.3%)
  - Class 1: 199 samples (26.7%)


In [18]:

# ============================================================================
# STEP 2: DATA PREPROCESSING
# ============================================================================

print("\n" + "="*80)
print("STEP 2: DATA PREPROCESSING")
print("="*80)

# Reshape for processing: (samples, channels, height, width)
print(f"\nOriginal shape: {X.shape}")

# Transpose to (samples, height, width, channels) for TensorFlow
X_transposed = np.transpose(X, (0, 2, 3, 1))
print(f"Transposed shape: {X_transposed.shape}")

# Handle NaN values (land masks in ocean current data)
print(f"\nHandling NaN values (land masks)...")
nan_count = np.isnan(X_transposed).sum()
print(f"Total NaN values: {nan_count}")

# Replace NaN with 0 (representing land)
X_processed = np.nan_to_num(X_transposed, nan=0.0)
print(f"NaN values after processing: {np.isnan(X_processed).sum()}")

# Channel-wise normalization
print(f"\nNormalizing channels...")
X_normalized = np.zeros_like(X_processed)
channel_stats = []

for i in range(X_processed.shape[-1]):
    channel_data = X_processed[:, :, :, i]
    # Only compute stats on non-zero values (ocean pixels)
    ocean_mask = channel_data != 0
    if ocean_mask.sum() > 0:
        mean_val = channel_data[ocean_mask].mean()
        std_val = channel_data[ocean_mask].std()
    else:
        mean_val = 0
        std_val = 1
    
    # Normalize
    X_normalized[:, :, :, i] = np.where(
        ocean_mask,
        (channel_data - mean_val) / (std_val + 1e-8),
        0
    )
    
    channel_stats.append({'channel': i, 'mean': mean_val, 'std': std_val})
    print(f"  - Channel {i}: mean={mean_val:.4f}, std={std_val:.4f}")



STEP 2: DATA PREPROCESSING

Original shape: (744, 6, 15, 15)
Transposed shape: (744, 15, 15, 6)

Handling NaN values (land masks)...
Total NaN values: 113088
NaN values after processing: 0

Normalizing channels...
  - Channel 0: mean=-0.0156, std=0.1828
  - Channel 1: mean=-0.4259, std=0.3755
  - Channel 2: mean=25.0232, std=1.6667
  - Channel 3: mean=35.4578, std=0.1574
  - Channel 4: mean=-2.1909, std=3.2990
  - Channel 5: mean=0.2956, std=5.3321


In [22]:

# ============================================================================
# STEP 3: SAVE COMBINED DATASET
# ============================================================================

print("\\n" + "="*80)
print("STEP 3: SAVING COMBINED DATASET")
print("="*80)


processed_data_path = current_dir / 'data' / 'processed' / 'GoldCoast' / 'combined_bluebottle_dataset_no_aug.npz'

np.savez_compressed(
    processed_data_path,
    X_normalized=X_normalized,
    X_raw=X_processed,
    y_binary=y_binary,
    y_sum=y_sum,
    dates=merged_data['Date'].values,
    channel_stats=channel_stats,
    crop_bounds={'lon_min': crop_lon_min, 'lon_max': crop_lon_max,
                 'lat_min': crop_lat_min, 'lat_max': crop_lat_max}
)
print("Saved processed data ")


STEP 3: SAVING COMBINED DATASET
\nSaving combined and preprocessed dataset...
Saved processed data 


In [23]:
# ============================================================================
# STEP 4: TRAIN-VALIDATION-TEST SPLIT (NO AUGMENTATION)
# ============================================================================

print("\\n" + "="*80)
print("STEP 4: TRAIN-VALIDATION-TEST SPLIT")
print("="*80)

# Stratified split
X_temp, X_test, y_temp, y_test = train_test_split(
    X_normalized, y_binary, 
    test_size=0.15, 
    random_state=42, 
    stratify=y_binary
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, 
    test_size=0.176,  # 0.176 * 0.85 ≈ 0.15 of total
    random_state=42, 
    stratify=y_temp
)

print(f"\\nDataset splits:")
print(f"  - Training: {X_train.shape[0]} samples")
print(f"  - Validation: {X_val.shape[0]} samples")
print(f"  - Test: {X_test.shape[0]} samples")

print(f"\\nClass distribution in splits:")
for split_name, y_split in [('Train', y_train), ('Val', y_val), ('Test', y_test)]:
    unique, counts = np.unique(y_split, return_counts=True)
    print(f"  {split_name}:", end='')
    for label, count in zip(unique, counts):
        print(f" Class {label}={count} ({count/len(y_split)*100:.1f}%)", end='')
    print()

# NO AUGMENTATION - Use original training data as-is
print(f"\\n⚠ NO DATA AUGMENTATION APPLIED")
print(f"  - Training samples remain: {X_train.shape[0]}")

# Compute class weights to handle imbalance
class_weights_array = compute_class_weight(
    'balanced',
    classes=np.unique(y_binary),
    y=y_binary
)
class_weights = {i: class_weights_array[i] for i in range(len(class_weights_array))}
print(f"\\nClass weights for imbalanced data: {class_weights}")


STEP 4: TRAIN-VALIDATION-TEST SPLIT
\nDataset splits:
  - Training: 520 samples
  - Validation: 112 samples
  - Test: 112 samples
\nClass distribution in splits:
  Train: Class 0=381 (73.3%) Class 1=139 (26.7%)
  Val: Class 0=82 (73.2%) Class 1=30 (26.8%)
  Test: Class 0=82 (73.2%) Class 1=30 (26.8%)
\n⚠ NO DATA AUGMENTATION APPLIED
  - Training samples remain: 520
\nClass weights for imbalanced data: {0: 0.6825688073394496, 1: 1.8693467336683418}


In [24]:

# ============================================================================
# STEP 5: BUILD 3D CNN MODELS (STRONGER REGULARIZATION FOR NO AUGMENTATION)
# ============================================================================

print("\\n" + "="*80)
print("STEP 5: BUILDING 3D CNN MODELS")
print("="*80)
print("Note: Using STRONGER regularization since no augmentation is applied\\n")

input_shape = X_train.shape[1:]  # (15, 15, 6)
print(f"Input shape: {input_shape}")

# ----------------------------------------------------------------------------
# MODEL 1: Lightweight 3D CNN (EXTRA regularization for no augmentation)
# ----------------------------------------------------------------------------

def build_lightweight_3d_cnn(input_shape, l2_reg=0.005, dropout_rate=0.5):
    """
    Lightweight 3D CNN with EXTRA strong regularization (no augmentation).
    
    Architecture principles:
    - Minimal parameters to prevent overfitting
    - EXTRA strong L2 regularization (0.005 instead of 0.001)
    - HIGH dropout (0.5 instead of 0.4)
    - Batch normalization for stable training
    - Global average pooling to reduce parameters
    """
    model = models.Sequential([
        # Input layer
        layers.Input(shape=input_shape),
        
        # First conv block - extract spatial features
        layers.Conv2D(32, (3, 3), padding='same', 
                     kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(dropout_rate),
        
        # Second conv block - learn patterns
        layers.Conv2D(64, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(dropout_rate),
        
        # Third conv block - abstract features
        layers.Conv2D(96, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        # Global pooling to reduce parameters
        layers.GlobalAveragePooling2D(),
        
        # Dense layers with strong regularization
        layers.Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout_rate),
        
        # Output layer
        layers.Dense(1, activation='sigmoid')
    ])
    
    return model

model_lightweight = build_lightweight_3d_cnn(input_shape)
model_lightweight.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc'), 
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("\\n" + "-"*80)
print("MODEL 1: Lightweight 3D CNN (Extra Regularization)")
print("-"*80)
model_lightweight.summary()
print(f"Total parameters: {model_lightweight.count_params():,}")


STEP 5: BUILDING 3D CNN MODELS
Note: Using STRONGER regularization since no augmentation is applied\n
Input shape: (15, 15, 6)
\n--------------------------------------------------------------------------------
MODEL 1: Lightweight 3D CNN (Extra Regularization)
--------------------------------------------------------------------------------
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_14 (Conv2D)          (None, 15, 15, 32)        1760      
                                                                 
 batch_normalization_13 (Bat  (None, 15, 15, 32)       128       
 chNormalization)                                                
                                                                 
 activation_13 (Activation)  (None, 15, 15, 32)        0         
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 7, 7,

In [25]:

# ----------------------------------------------------------------------------
# MODEL 2: Deep 3D CNN with Residual Connections (EXTRA regularization)
# ----------------------------------------------------------------------------

def build_residual_3d_cnn(input_shape, l2_reg=0.005, dropout_rate=0.5):
    """
    Deeper 3D CNN with residual connections and EXTRA regularization.
    """
    inputs = layers.Input(shape=input_shape)
    
    # Initial convolution
    x = layers.Conv2D(32, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    # Residual block 1
    shortcut = x
    x = layers.Conv2D(32, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(32, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(dropout_rate)(x)
    
    # Residual block 2
    shortcut = layers.Conv2D(64, (1, 1), padding='same')(x)
    x = layers.Conv2D(64, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(64, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(dropout_rate)(x)
    
    # Final convolution
    x = layers.Conv2D(96, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    # Global pooling and dense layers
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.Dropout(dropout_rate)(x)
    
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

model_residual = build_residual_3d_cnn(input_shape)
model_residual.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0003),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc'),
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("\\n" + "-"*80)
print("MODEL 2: Residual 3D CNN (Extra Regularization)")
print("-"*80)
model_residual.summary()
print(f"Total parameters: {model_residual.count_params():,}")


\n--------------------------------------------------------------------------------
MODEL 2: Residual 3D CNN (Extra Regularization)
--------------------------------------------------------------------------------
Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 15, 15, 6)]  0           []                               
                                                                                                  
 conv2d_17 (Conv2D)             (None, 15, 15, 32)   1760        ['input_5[0][0]']                
                                                                                                  
 batch_normalization_16 (BatchN  (None, 15, 15, 32)  128         ['conv2d_17[0][0]']              
 ormalization)                                                                

In [26]:

# ----------------------------------------------------------------------------
# MODEL 3: Multi-Scale 3D CNN (EXTRA regularization)
# ----------------------------------------------------------------------------

def build_multiscale_3d_cnn(input_shape, l2_reg=0.005, dropout_rate=0.5):
    """
    Multi-scale CNN with EXTRA regularization (no augmentation).
    """
    inputs = layers.Input(shape=input_shape)
    
    # Path 1: Small kernel (3x3) - local features
    path1 = layers.Conv2D(20, (3, 3), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg))(inputs)
    path1 = layers.BatchNormalization()(path1)
    path1 = layers.Activation('relu')(path1)
    
    # Path 2: Medium kernel (5x5) - medium-scale features
    path2 = layers.Conv2D(20, (5, 5), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg))(inputs)
    path2 = layers.BatchNormalization()(path2)
    path2 = layers.Activation('relu')(path2)
    
    # Path 3: Large kernel (7x7) - large-scale features
    path3 = layers.Conv2D(12, (7, 7), padding='same',
                         kernel_regularizer=regularizers.l2(l2_reg))(inputs)
    path3 = layers.BatchNormalization()(path3)
    path3 = layers.Activation('relu')(path3)
    
    # Concatenate multi-scale features
    x = layers.Concatenate()([path1, path2, path3])
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(dropout_rate)(x)
    
    # Second conv block
    x = layers.Conv2D(96, (3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(dropout_rate)(x)
    
    # Global pooling and dense layers
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.Dropout(dropout_rate)(x)
    
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

model_multiscale = build_multiscale_3d_cnn(input_shape)
model_multiscale.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc'),
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("\\n" + "-"*80)
print("MODEL 3: Multi-Scale 3D CNN (Extra Regularization)")
print("-"*80)
model_multiscale.summary()
print(f"Total parameters: {model_multiscale.count_params():,}")


\n--------------------------------------------------------------------------------
MODEL 3: Multi-Scale 3D CNN (Extra Regularization)
--------------------------------------------------------------------------------
Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 15, 15, 6)]  0           []                               
                                                                                                  
 conv2d_24 (Conv2D)             (None, 15, 15, 20)   1100        ['input_6[0][0]']                
                                                                                                  
 conv2d_25 (Conv2D)             (None, 15, 15, 20)   3020        ['input_6[0][0]']                
                                                                           

In [None]:

# ============================================================================
# STEP 6: TRAINING CALLBACKS
# ============================================================================

print("\\n" + "="*80)
print("STEP 6: SETTING UP TRAINING CALLBACKS")
print("="*80)

# Create directories for saving models
os.makedirs('2dmodels', exist_ok=True)
os.makedirs('2dlogs', exist_ok=True)

# Callbacks for all models (MORE aggressive early stopping for no augmentation)
def get_callbacks(model_name):
    return [
        EarlyStopping(
            monitor='val_auc',
            patience=30,  # Increased patience
            mode='max',
            verbose=1,
            restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=15,  # Increased patience
            min_lr=1e-7,
            verbose=1
        ),
        ModelCheckpoint(
            f'models/{model_name}_no_aug_best.keras',
            monitor='val_auc',
            mode='max',
            save_best_only=True,
            verbose=1
        )
    ]

print("Callbacks configured:")
print("  - Early Stopping (patience=30, monitor=val_auc)")
print("  - Reduce LR on Plateau (patience=15, factor=0.5)")
print("  - Model Checkpoint (save best model)")

# ============================================================================
# STEP 7: TRAINING MODELS (NO AUGMENTATION)
# ============================================================================

print("\\n" + "="*80)
print("STEP 7: TRAINING MODELS (NO AUGMENTATION)")
print("="*80)
print("\\n⚠ Training WITHOUT data augmentation")
print("=" * 80)

# Training configuration
EPOCHS = 150  # More epochs since no augmentation
BATCH_SIZE = 16

# Dictionary to store all models and their histories
models_dict = {
    'lightweight': model_lightweight,
    'residual': model_residual,
    'multiscale': model_multiscale
}

histories = {}

# Train each model
for model_name, model in models_dict.items():
    print(f"\\n{'='*80}")
    print(f"TRAINING: {model_name.upper()} MODEL")
    print(f"{'='*80}")
    
    history = model.fit(
        X_train, y_train,  # NO AUGMENTATION - original data only
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        class_weight=class_weights,
        callbacks=get_callbacks(model_name),
        verbose=2
    )
    
    histories[model_name] = history
    print(f"\\n✓ {model_name.upper()} model training complete")


STEP 6: SETTING UP TRAINING CALLBACKS
Callbacks configured:
  - Early Stopping (patience=30, monitor=val_auc)
  - Reduce LR on Plateau (patience=15, factor=0.5)
  - Model Checkpoint (save best model)
STEP 7: TRAINING MODELS (NO AUGMENTATION)
\n⚠ Training WITHOUT data augmentation
TRAINING: LIGHTWEIGHT MODEL
Epoch 1/150

Epoch 1: val_auc improved from -inf to 0.76240, saving model to models\lightweight_no_aug_best.keras
33/33 - 2s - loss: 1.6880 - accuracy: 0.6192 - auc: 0.6248 - precision: 0.3575 - recall: 0.5324 - val_loss: 1.6726 - val_accuracy: 0.4821 - val_auc: 0.7624 - val_precision: 0.3333 - val_recall: 0.9333 - lr: 5.0000e-04 - 2s/epoch - 58ms/step
Epoch 2/150

Epoch 2: val_auc improved from 0.76240 to 0.78659, saving model to models\lightweight_no_aug_best.keras
33/33 - 0s - loss: 1.6003 - accuracy: 0.6288 - auc: 0.6924 - precision: 0.3945 - recall: 0.7266 - val_loss: 1.6173 - val_accuracy: 0.5268 - val_auc: 0.7866 - val_precision: 0.3544 - val_recall: 0.9333 - lr: 5.0000e-04 -

In [28]:

# ============================================================================
# STEP 8: EVALUATION
# ============================================================================

print("\\n" + "="*80)
print("STEP 8: MODEL EVALUATION ON TEST SET")
print("="*80)

results = {}

for model_name, model in models_dict.items():
    print(f"\\n{'-'*80}")
    print(f"EVALUATING: {model_name.upper()} MODEL")
    print(f"{'-'*80}")
    
    # Predictions
    y_pred_proba = model.predict(X_test, verbose=0)
    y_pred = (y_pred_proba > 0.5).astype(int).flatten()
    
    # Metrics
    test_loss, test_acc, test_auc, test_prec, test_rec = model.evaluate(
        X_test, y_test, verbose=0
    )
    
    print(f"\\nTest Metrics:")
    print(f"  - Loss: {test_loss:.4f}")
    print(f"  - Accuracy: {test_acc:.4f}")
    print(f"  - AUC: {test_auc:.4f}")
    print(f"  - Precision: {test_prec:.4f}")
    print(f"  - Recall: {test_rec:.4f}")
    
    print(f"\\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['No Stings', 'Stings']))
    
    print(f"\\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
    results[model_name] = {
        'accuracy': test_acc,
        'auc': test_auc,
        'precision': test_prec,
        'recall': test_rec,
        'loss': test_loss
    }


STEP 8: MODEL EVALUATION ON TEST SET
\n--------------------------------------------------------------------------------
EVALUATING: LIGHTWEIGHT MODEL
--------------------------------------------------------------------------------
\nTest Metrics:
  - Loss: 0.8027
  - Accuracy: 0.7143
  - AUC: 0.7888
  - Precision: 0.4800
  - Recall: 0.8000
\nClassification Report:
              precision    recall  f1-score   support

   No Stings       0.90      0.68      0.78        82
      Stings       0.48      0.80      0.60        30

    accuracy                           0.71       112
   macro avg       0.69      0.74      0.69       112
weighted avg       0.79      0.71      0.73       112

\nConfusion Matrix:
[[56 26]
 [ 6 24]]
\n--------------------------------------------------------------------------------
EVALUATING: RESIDUAL MODEL
--------------------------------------------------------------------------------
\nTest Metrics:
  - Loss: 1.2017
  - Accuracy: 0.5804
  - AUC: 0.8343
  - Pr

In [29]:

# ============================================================================
# STEP 9: MODEL COMPARISON
# ============================================================================

print("\\n" + "="*80)
print("STEP 9: MODEL COMPARISON SUMMARY")
print("="*80)

print("\\n{:<15} {:<12} {:<12} {:<12} {:<12}".format(
    'Model', 'Accuracy', 'AUC', 'Precision', 'Recall'
))
print("-" * 65)

for model_name, metrics in results.items():
    print("{:<15} {:<12.4f} {:<12.4f} {:<12.4f} {:<12.4f}".format(
        model_name.capitalize(),
        metrics['accuracy'],
        metrics['auc'],
        metrics['precision'],
        metrics['recall']
    ))

# Find best model
best_model_name = max(results, key=lambda x: results[x]['auc'])
print(f"\\n✓ BEST MODEL: {best_model_name.upper()} (based on AUC)")

# ============================================================================
# STEP 10: SAVE RESULTS
# ============================================================================

print("\\n" + "="*80)
print("STEP 10: SAVING RESULTS")
print("="*80)

# Save results to CSV
results_df = pd.DataFrame(results).T
results_df.to_csv('model_comparison_results_no_aug.csv')
print("\\n✓ Saved model comparison to 'model_comparison_results_no_aug.csv'")

# Save training histories
for model_name, history in histories.items():
    history_df = pd.DataFrame(history.history)
    history_df.to_csv(f'logs/{model_name}_no_aug_training_history.csv', index=False)
    print(f"✓ Saved {model_name} training history")

print("\\n" + "="*80)
print("TRAINING AND EVALUATION COMPLETE! (NO AUGMENTATION)")
print("="*80)
print("\\nOutput files:")
print("  - combined_bluebottle_dataset_no_aug.npz (combined dataset)")
print("  - models/*_no_aug_best.keras (trained models)")
print("  - model_comparison_results_no_aug.csv (performance metrics)")
print("  - logs/*_no_aug_training_history.csv (training histories)")
print("\\nTo use the best model for prediction:")
print(f"  best_model = keras.models.load_model('models/{best_model_name}_no_aug_best.keras')")
print("  predictions = best_model.predict(new_data)")
print("\\nNote: Models trained WITHOUT augmentation using stronger regularization")
print("="*80)


STEP 9: MODEL COMPARISON SUMMARY
\nModel           Accuracy     AUC          Precision    Recall      
-----------------------------------------------------------------
Lightweight     0.7143       0.7888       0.4800       0.8000      
Residual        0.5804       0.8343       0.3836       0.9333      
Multiscale      0.7143       0.7939       0.4815       0.8667      
\n✓ BEST MODEL: RESIDUAL (based on AUC)
STEP 10: SAVING RESULTS
\n✓ Saved model comparison to 'model_comparison_results_no_aug.csv'
✓ Saved lightweight training history
✓ Saved residual training history
✓ Saved multiscale training history
TRAINING AND EVALUATION COMPLETE! (NO AUGMENTATION)
\nOutput files:
  - combined_bluebottle_dataset_no_aug.npz (combined dataset)
  - models/*_no_aug_best.keras (trained models)
  - model_comparison_results_no_aug.csv (performance metrics)
  - logs/*_no_aug_training_history.csv (training histories)
\nTo use the best model for prediction:
  best_model = keras.models.load_model('models/r