In [None]:
from pathlib import Path
import os

# Get current working directory
current_dir = Path(os.getcwd()).parent

# Adjust path based on your notebook location relative to data
npz_path = current_dir / 'data' / '2-Data' / 'GoldCoast' / 'current_wind_20100101_20241231_GoaldCoast.npz'
stings_path = current_dir / 'data'  / '2-Data' / 'GoldCoast' / 'goaldcoast_stings.csv'


In [1]:

# Create a TRUE 3D CNN implementation with temporal sequences
# Using famous architectures: C3D, 3D ResNet, and I3D-inspired

"""
TRUE 3D CNN for Bluebottle Sting Prediction - Gold Coast Marine Data
Using Temporal Sequences with Conv3D Layers
Based on: C3D, 3D ResNet, and I3D architectures
Author: Professional Data Scientist
Date: October 2025

This script:
1. Creates temporal sequences (e.g., last 5-7 days) from environmental data
2. Uses TRUE 3D convolutions (Conv3D) to process spatiotemporal data
3. Implements famous architectures: C3D, 3D ResNet, I3D-inspired
4. NO data augmentation (as requested)
"""

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from datetime import datetime
import os
import warnings
warnings.filterwarnings('ignore')

# Set random seeds
np.random.seed(42)
tf.random.set_seed(42)

print("="*80)
print("TRUE 3D CNN FOR BLUEBOTTLE STING PREDICTION")
print("Using Conv3D with Temporal Sequences")
print("="*80)
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU'))} device(s)")
print("="*80)

# ============================================================================
# CONFIGURATION
# ============================================================================

TEMPORAL_LENGTH = 7  # Number of consecutive days to use as input
BATCH_SIZE = 8  # Smaller batch size for 3D CNNs (memory intensive)
EPOCHS = 150
L2_REG = 0.01  # Strong regularization for small dataset
DROPOUT = 0.5

print(f"\\nConfiguration:")
print(f"  - Temporal length: {TEMPORAL_LENGTH} days")
print(f"  - Batch size: {BATCH_SIZE}")
print(f"  - Epochs: {EPOCHS}")
print(f"  - L2 regularization: {L2_REG}")
print(f"  - Dropout: {DROPOUT}")


TRUE 3D CNN FOR BLUEBOTTLE STING PREDICTION
Using Conv3D with Temporal Sequences
TensorFlow version: 2.10.1
GPU Available: 1 device(s)
\nConfiguration:
  - Temporal length: 7 days
  - Batch size: 8
  - Epochs: 150
  - L2 regularization: 0.01
  - Dropout: 0.5


In [5]:

# ============================================================================
# STEP 1: LOAD AND COMBINE DATASETS
# ============================================================================

print("\\n" + "="*80)
print("STEP 1: LOADING AND COMBINING DATASETS")
print("="*80)

# Load environmental data
print("\\nLoading environmental data from NPZ file...")
env_data = np.load(npz_path)

UVTs_data = env_data['UVTempSalt_UVTs']  # Shape: (5479, 6, 15, 15)
crop_lon_min = env_data['crop_lon_min']
crop_lon_max = env_data['crop_lon_max']
crop_lat_min = env_data['crop_lat_min']
crop_lat_max = env_data['crop_lat_max']

print(f"Environmental data shape: {UVTs_data.shape}")
print(f"  - Days: {UVTs_data.shape[0]}")
print(f"  - Channels: {UVTs_data.shape[1]}")
print(f"  - Spatial grid: {UVTs_data.shape[2]}x{UVTs_data.shape[3]}")

# Create date range for environmental data
start_date = datetime(2010, 1, 1)
env_dates = pd.date_range(start=start_date, periods=UVTs_data.shape[0], freq='D')

# Load sting data
print("\\nLoading sting data from CSV...")
sting_data = pd.read_csv(stings_path)
sting_data['time'] = pd.to_datetime(sting_data['time'], dayfirst=True, errors='coerce')
print(f"Sting data shape: {sting_data.shape}")

# Match dates
env_df = pd.DataFrame({'Date': env_dates, 'env_index': range(len(env_dates))})
sting_data['Date'] = sting_data['time'].dt.date
env_df['Date'] = env_df['Date'].dt.date

merged_data = sting_data.merge(env_df, on='Date', how='inner')
print(f"Matched records: {len(merged_data)}")


STEP 1: LOADING AND COMBINING DATASETS
\nLoading environmental data from NPZ file...
Environmental data shape: (5479, 6, 15, 15)
  - Days: 5479
  - Channels: 6
  - Spatial grid: 15x15
\nLoading sting data from CSV...
Sting data shape: (797, 3)
Matched records: 744


In [6]:

# ============================================================================
# STEP 2: CREATE TEMPORAL SEQUENCES
# ============================================================================

print("\\n" + "="*80)
print("STEP 2: CREATING TEMPORAL SEQUENCES")
print("="*80)

def create_temporal_sequences(data, indices, temporal_length):
    """
    Create temporal sequences for 3D CNN.
    
    Parameters:
    -----------
    data : array (5479, 6, 15, 15)
        Full environmental data
    indices : array
        Indices of days with sting data
    temporal_length : int
        Number of previous days to include
        
    Returns:
    --------
    X_sequences : array (n_samples, temporal_length, 6, 15, 15)
        Temporal sequences
    valid_indices : array
        Indices of valid sequences (where all previous days exist)
    """
    sequences = []
    valid_idx = []
    
    for i, idx in enumerate(indices):
        # Check if we have enough previous days
        if idx >= temporal_length - 1:
            # Extract sequence: [idx-(temporal_length-1), ..., idx]
            sequence = data[idx - (temporal_length - 1):idx + 1]
            sequences.append(sequence)
            valid_idx.append(i)
    
    return np.array(sequences), np.array(valid_idx)

print(f"\\nCreating sequences of {TEMPORAL_LENGTH} consecutive days...")
matched_indices = merged_data['env_index'].values
y_binary = merged_data['stings_Binary'].values
y_sum = merged_data['stings_sum'].values

# Create temporal sequences
X_sequences, valid_idx = create_temporal_sequences(
    UVTs_data, matched_indices, TEMPORAL_LENGTH
)

# Filter labels to match valid sequences
y_binary = y_binary[valid_idx]
y_sum = y_sum[valid_idx]
matched_data_filtered = merged_data.iloc[valid_idx].reset_index(drop=True)

print(f"Original matched samples: {len(merged_data)}")
print(f"Valid sequences (with {TEMPORAL_LENGTH} days): {len(X_sequences)}")
print(f"Removed samples (insufficient history): {len(merged_data) - len(X_sequences)}")
print(f"\\nSequence shape: {X_sequences.shape}")
print(f"  Format: (samples, time_steps, channels, height, width)")


STEP 2: CREATING TEMPORAL SEQUENCES
\nCreating sequences of 7 consecutive days...
Original matched samples: 744
Valid sequences (with 7 days): 743
Removed samples (insufficient history): 1
\nSequence shape: (743, 7, 6, 15, 15)
  Format: (samples, time_steps, channels, height, width)


In [7]:

# ============================================================================
# STEP 3: DATA PREPROCESSING
# ============================================================================

print("\\n" + "="*80)
print("STEP 3: DATA PREPROCESSING")
print("="*80)

# Transpose to TensorFlow format: (samples, time, height, width, channels)
X_transposed = np.transpose(X_sequences, (0, 1, 3, 4, 2))
print(f"\\nTransposed shape: {X_transposed.shape}")
print(f"  Format: (samples, time_steps, height, width, channels)")

# Handle NaN values (land masks)
print(f"\\nHandling NaN values (land masks)...")
nan_count = np.isnan(X_transposed).sum()
print(f"Total NaN values: {nan_count}")
X_processed = np.nan_to_num(X_transposed, nan=0.0)

# Channel-wise normalization
print(f"\\nNormalizing channels across all samples and time steps...")
X_normalized = np.zeros_like(X_processed)
channel_stats = []

for i in range(X_processed.shape[-1]):
    channel_data = X_processed[:, :, :, :, i]
    ocean_mask = channel_data != 0
    
    if ocean_mask.sum() > 0:
        mean_val = channel_data[ocean_mask].mean()
        std_val = channel_data[ocean_mask].std()
    else:
        mean_val = 0
        std_val = 1
    
    X_normalized[:, :, :, :, i] = np.where(
        ocean_mask,
        (channel_data - mean_val) / (std_val + 1e-8),
        0
    )
    
    channel_stats.append({'channel': i, 'mean': mean_val, 'std': std_val})
    print(f"  - Channel {i}: mean={mean_val:.4f}, std={std_val:.4f}")

# Class distribution
print(f"\\nLabel distribution:")
unique, counts = np.unique(y_binary, return_counts=True)
for label, count in zip(unique, counts):
    print(f"  - Class {label}: {count} samples ({count/len(y_binary)*100:.1f}%)")


STEP 3: DATA PREPROCESSING
\nTransposed shape: (743, 7, 15, 15, 6)
  Format: (samples, time_steps, height, width, channels)
\nHandling NaN values (land masks)...
Total NaN values: 790552
\nNormalizing channels across all samples and time steps...
  - Channel 0: mean=-0.0158, std=0.1822
  - Channel 1: mean=-0.4264, std=0.3704
  - Channel 2: mean=24.9869, std=1.7064
  - Channel 3: mean=35.4594, std=0.1573
  - Channel 4: mean=-2.2297, std=3.1888
  - Channel 5: mean=0.2571, std=5.2896
\nLabel distribution:
  - Class 0: 544 samples (73.2%)
  - Class 1: 199 samples (26.8%)


In [11]:

# ============================================================================
# STEP 4: SAVE COMBINED DATASET
# ============================================================================

print("\\n" + "="*80)
print("STEP 4: SAVING COMBINED DATASET")
print("="*80)
processed_data_path = current_dir / 'data' / 'processed' / 'GoldCoast' / 'combined_bluebottle_3dcnn.npz'

np.savez_compressed(
    processed_data_path,
    X_normalized=X_normalized,
    X_raw=X_processed,
    y_binary=y_binary,
    y_sum=y_sum,
    dates=matched_data_filtered['Date'].values,
    temporal_length=TEMPORAL_LENGTH,
    channel_stats=channel_stats
)
print("✓ Saved as 'combined_bluebottle_3dcnn.npz'")


STEP 4: SAVING COMBINED DATASET
✓ Saved as 'combined_bluebottle_3dcnn.npz'


In [12]:

# ============================================================================
# STEP 5: TRAIN-VALIDATION-TEST SPLIT
# ============================================================================

print("\\n" + "="*80)
print("STEP 5: TRAIN-VALIDATION-TEST SPLIT")
print("="*80)

X_temp, X_test, y_temp, y_test = train_test_split(
    X_normalized, y_binary, 
    test_size=0.15, 
    random_state=42, 
    stratify=y_binary
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, 
    test_size=0.176,
    random_state=42, 
    stratify=y_temp
)

print(f"\\nDataset splits:")
print(f"  - Training: {X_train.shape[0]} samples")
print(f"  - Validation: {X_val.shape[0]} samples")
print(f"  - Test: {X_test.shape[0]} samples")

for split_name, y_split in [('Train', y_train), ('Val', y_val), ('Test', y_test)]:
    unique, counts = np.unique(y_split, return_counts=True)
    print(f"  {split_name}:", end='')
    for label, count in zip(unique, counts):
        print(f" Class {label}={count} ({count/len(y_split)*100:.1f}%)", end='')
    print()

# Class weights
class_weights_array = compute_class_weight(
    'balanced', classes=np.unique(y_binary), y=y_binary
)
class_weights = {i: class_weights_array[i] for i in range(len(class_weights_array))}
print(f"\\nClass weights: {class_weights}")


STEP 5: TRAIN-VALIDATION-TEST SPLIT
\nDataset splits:
  - Training: 519 samples
  - Validation: 112 samples
  - Test: 112 samples
  Train: Class 0=380 (73.2%) Class 1=139 (26.8%)
  Val: Class 0=82 (73.2%) Class 1=30 (26.8%)
  Test: Class 0=82 (73.2%) Class 1=30 (26.8%)
\nClass weights: {0: 0.6829044117647058, 1: 1.8668341708542713}


In [13]:

# ============================================================================
# STEP 6: BUILD 3D CNN MODELS - FAMOUS ARCHITECTURES
# ============================================================================

print("\\n" + "="*80)
print("STEP 6: BUILDING TRUE 3D CNN MODELS")
print("="*80)

input_shape = X_train.shape[1:]  # (temporal_length, 15, 15, 6)
print(f"\\nInput shape: {input_shape}")
print(f"  - Temporal: {input_shape[0]} frames")
print(f"  - Spatial: {input_shape[1]}x{input_shape[2]}")
print(f"  - Channels: {input_shape[3]}")

# ----------------------------------------------------------------------------
# MODEL 1: C3D - Classic 3D CNN Architecture
# Based on: "Learning Spatiotemporal Features with 3D CNNs" (Tran et al., 2015)
# ----------------------------------------------------------------------------

def build_c3d_model(input_shape, l2_reg=0.01, dropout=0.5):
    """
    C3D architecture adapted for small spatiotemporal data.
    
    Original C3D uses 3x3x3 kernels throughout.
    Adapted for smaller spatial dimensions (15x15) and strong regularization.
    
    Reference: Tran et al., "Learning Spatiotemporal Features with 3D CNNs", ICCV 2015
    """
    model = models.Sequential([
        # Input
        layers.Input(shape=input_shape),
        
        # Conv block 1
        layers.Conv3D(32, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling3D(pool_size=(1, 2, 2)),
        layers.Dropout(dropout),
        
        # Conv block 2
        layers.Conv3D(64, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling3D(pool_size=(2, 2, 2)),
        layers.Dropout(dropout),
        
        # Conv block 3
        layers.Conv3D(128, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg)),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling3D(pool_size=(2, 2, 2)),
        layers.Dropout(dropout),
        
        # Global pooling
        layers.GlobalAveragePooling3D(),
        
        # Dense layers
        layers.Dense(128, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout),
        
        layers.Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout),
        
        # Output
        layers.Dense(1, activation='sigmoid')
    ])
    
    return model

model_c3d = build_c3d_model(input_shape, l2_reg=L2_REG, dropout=DROPOUT)
model_c3d.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0003),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc'), 
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("\\n" + "-"*80)
print("MODEL 1: C3D (3D ConvNet)")
print("Based on: Tran et al., ICCV 2015")
print("-"*80)
model_c3d.summary()
print(f"Total parameters: {model_c3d.count_params():,}")


STEP 6: BUILDING TRUE 3D CNN MODELS
\nInput shape: (7, 15, 15, 6)
  - Temporal: 7 frames
  - Spatial: 15x15
  - Channels: 6
\n--------------------------------------------------------------------------------
MODEL 1: C3D (3D ConvNet)
Based on: Tran et al., ICCV 2015
--------------------------------------------------------------------------------
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 7, 15, 15, 32)     5216      
                                                                 
 batch_normalization (BatchN  (None, 7, 15, 15, 32)    128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 7, 15, 15, 32)     0         
                                                                 
 max_pooling3d (MaxPooling3D  (None, 7,

In [14]:

# ----------------------------------------------------------------------------
# MODEL 2: 3D ResNet - Residual 3D CNN
# Based on: "Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?" (Hara et al., 2018)
# ----------------------------------------------------------------------------

def residual_block_3d(x, filters, l2_reg=0.01, dropout=0.5):
    """3D Residual block with skip connection."""
    shortcut = x
    
    # First conv
    x = layers.Conv3D(filters, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(dropout)(x)
    
    # Second conv
    x = layers.Conv3D(filters, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    
    # Match dimensions if needed
    if shortcut.shape[-1] != filters:
        shortcut = layers.Conv3D(filters, kernel_size=(1, 1, 1), padding='same')(shortcut)
    
    # Add skip connection
    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)
    
    return x

def build_3d_resnet(input_shape, l2_reg=0.01, dropout=0.5):
    """
    3D ResNet architecture for spatiotemporal learning.
    
    Uses residual connections to enable deeper networks.
    
    Reference: Hara et al., "Can Spatiotemporal 3D CNNs Retrace...", CVPR 2018
    """
    inputs = layers.Input(shape=input_shape)
    
    # Initial conv
    x = layers.Conv3D(32, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    
    # Residual blocks
    x = residual_block_3d(x, 32, l2_reg, dropout)
    x = layers.MaxPooling3D(pool_size=(1, 2, 2))(x)
    
    x = residual_block_3d(x, 64, l2_reg, dropout)
    x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
    
    x = residual_block_3d(x, 128, l2_reg, dropout)
    
    # Global pooling
    x = layers.GlobalAveragePooling3D()(x)
    
    # Dense layers
    x = layers.Dense(128, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.Dropout(dropout)(x)
    
    x = layers.Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.Dropout(dropout)(x)
    
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

model_3dresnet = build_3d_resnet(input_shape, l2_reg=L2_REG, dropout=DROPOUT)
model_3dresnet.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0002),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc'),
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("\\n" + "-"*80)
print("MODEL 2: 3D ResNet")
print("Based on: Hara et al., CVPR 2018")
print("-"*80)
model_3dresnet.summary()
print(f"Total parameters: {model_3dresnet.count_params():,}")


\n--------------------------------------------------------------------------------
MODEL 2: 3D ResNet
Based on: Hara et al., CVPR 2018
--------------------------------------------------------------------------------
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 7, 15, 15,   0           []                               
                                6)]                                                               
                                                                                                  
 conv3d_3 (Conv3D)              (None, 7, 15, 15, 3  5216        ['input_2[0][0]']                
                                2)                                                                
                                                                            

In [15]:

# ----------------------------------------------------------------------------
# MODEL 3: I3D-Inspired - Inflated 3D CNN
# Based on: "Quo Vadis, Action Recognition?" (Carreira & Zisserman, 2017)
# ----------------------------------------------------------------------------

def build_i3d_inspired(input_shape, l2_reg=0.01, dropout=0.5):
    """
    I3D-inspired architecture with mixed 3D convolutions.
    
    Uses different temporal kernel sizes to capture multi-scale temporal patterns.
    
    Reference: Carreira & Zisserman, "Quo Vadis", CVPR 2017
    """
    inputs = layers.Input(shape=input_shape)
    
    # Initial 3D conv with larger temporal kernel
    x = layers.Conv3D(32, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling3D(pool_size=(1, 2, 2))(x)
    x = layers.Dropout(dropout)(x)
    
    # Inception-like block with different temporal scales
    # Branch 1: 1x3x3 (spatial only)
    branch1 = layers.Conv3D(16, kernel_size=(1, 3, 3), padding='same',
                           kernel_regularizer=regularizers.l2(l2_reg))(x)
    branch1 = layers.BatchNormalization()(branch1)
    branch1 = layers.Activation('relu')(branch1)
    
    # Branch 2: 3x3x3 (spatiotemporal)
    branch2 = layers.Conv3D(16, kernel_size=(3, 3, 3), padding='same',
                           kernel_regularizer=regularizers.l2(l2_reg))(x)
    branch2 = layers.BatchNormalization()(branch2)
    branch2 = layers.Activation('relu')(branch2)
    
    # Branch 3: 5x3x3 (longer temporal)
    branch3 = layers.Conv3D(16, kernel_size=(5, 3, 3), padding='same',
                           kernel_regularizer=regularizers.l2(l2_reg))(x)
    branch3 = layers.BatchNormalization()(branch3)
    branch3 = layers.Activation('relu')(branch3)
    
    # Concatenate branches
    x = layers.Concatenate()([branch1, branch2, branch3])
    x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
    x = layers.Dropout(dropout)(x)
    
    # Additional 3D conv blocks
    x = layers.Conv3D(96, kernel_size=(3, 3, 3), padding='same',
                     kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(dropout)(x)
    
    # Global pooling
    x = layers.GlobalAveragePooling3D()(x)
    
    # Dense layers
    x = layers.Dense(128, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.Dropout(dropout)(x)
    
    x = layers.Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_reg))(x)
    x = layers.Dropout(dropout)(x)
    
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

model_i3d = build_i3d_inspired(input_shape, l2_reg=L2_REG, dropout=DROPOUT)
model_i3d.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0003),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc'),
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

print("\\n" + "-"*80)
print("MODEL 3: I3D-Inspired (Inflated 3D)")
print("Based on: Carreira & Zisserman, CVPR 2017")
print("-"*80)
model_i3d.summary()
print(f"Total parameters: {model_i3d.count_params():,}")


\n--------------------------------------------------------------------------------
MODEL 3: I3D-Inspired (Inflated 3D)
Based on: Carreira & Zisserman, CVPR 2017
--------------------------------------------------------------------------------
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 7, 15, 15,   0           []                               
                                6)]                                                               
                                                                                                  
 conv3d_12 (Conv3D)             (None, 7, 15, 15, 3  5216        ['input_3[0][0]']                
                                2)                                                                
                                                

In [16]:

# ============================================================================
# STEP 7: TRAINING CALLBACKS
# ============================================================================

print("\\n" + "="*80)
print("STEP 7: SETTING UP CALLBACKS")
print("="*80)

os.makedirs('3dmodels', exist_ok=True)
os.makedirs('3dlogs', exist_ok=True)

def get_callbacks(model_name):
    return [
        EarlyStopping(
            monitor='val_auc',
            patience=30,
            mode='max',
            verbose=1,
            restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=15,
            min_lr=1e-7,
            verbose=1
        ),
        ModelCheckpoint(
            f'models/{model_name}_3dcnn_best.keras',
            monitor='val_auc',
            mode='max',
            save_best_only=True,
            verbose=1
        )
    ]

print("Callbacks ready: EarlyStopping, ReduceLROnPlateau, ModelCheckpoint")

# ============================================================================
# STEP 8: TRAINING MODELS
# ============================================================================

print("\\n" + "="*80)
print("STEP 8: TRAINING TRUE 3D CNN MODELS")
print("="*80)

models_dict = {
    'c3d': model_c3d,
    '3dresnet': model_3dresnet,
    'i3d': model_i3d
}

histories = {}

for model_name, model in models_dict.items():
    print(f"\\n{'='*80}")
    print(f"TRAINING: {model_name.upper()}")
    print(f"{'='*80}")
    
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        class_weight=class_weights,
        callbacks=get_callbacks(model_name),
        verbose=2
    )
    
    histories[model_name] = history
    print(f"\\n✓ {model_name.upper()} training complete")


STEP 7: SETTING UP CALLBACKS
Callbacks ready: EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
STEP 8: TRAINING TRUE 3D CNN MODELS
TRAINING: C3D
Epoch 1/150

Epoch 1: val_auc improved from -inf to 0.73232, saving model to models\c3d_3dcnn_best.keras
65/65 - 6s - loss: 4.6583 - accuracy: 0.5222 - auc: 0.5084 - precision: 0.2700 - recall: 0.4604 - val_loss: 4.0609 - val_accuracy: 0.6429 - val_auc: 0.7323 - val_precision: 0.4138 - val_recall: 0.8000 - lr: 3.0000e-04 - 6s/epoch - 86ms/step
Epoch 2/150

Epoch 2: val_auc did not improve from 0.73232
65/65 - 1s - loss: 4.2142 - accuracy: 0.5530 - auc: 0.5903 - precision: 0.3038 - recall: 0.5180 - val_loss: 3.9708 - val_accuracy: 0.6071 - val_auc: 0.7197 - val_precision: 0.3906 - val_recall: 0.8333 - lr: 3.0000e-04 - 667ms/epoch - 10ms/step
Epoch 3/150

Epoch 3: val_auc improved from 0.73232 to 0.74634, saving model to models\c3d_3dcnn_best.keras
65/65 - 1s - loss: 4.2160 - accuracy: 0.4701 - auc: 0.5041 - precision: 0.2444 - recall: 0.4676 -

In [17]:

# ============================================================================
# STEP 9: EVALUATION
# ============================================================================

print("\\n" + "="*80)
print("STEP 9: MODEL EVALUATION")
print("="*80)

results = {}

for model_name, model in models_dict.items():
    print(f"\\n{'-'*80}")
    print(f"EVALUATING: {model_name.upper()}")
    print(f"{'-'*80}")
    
    y_pred_proba = model.predict(X_test, verbose=0)
    y_pred = (y_pred_proba > 0.5).astype(int).flatten()
    
    test_loss, test_acc, test_auc, test_prec, test_rec = model.evaluate(
        X_test, y_test, verbose=0
    )
    
    print(f"\\nTest Metrics:")
    print(f"  - Loss: {test_loss:.4f}")
    print(f"  - Accuracy: {test_acc:.4f}")
    print(f"  - AUC: {test_auc:.4f}")
    print(f"  - Precision: {test_prec:.4f}")
    print(f"  - Recall: {test_rec:.4f}")
    
    print(f"\\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['No Stings', 'Stings']))
    
    print(f"\\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
    results[model_name] = {
        'accuracy': test_acc,
        'auc': test_auc,
        'precision': test_prec,
        'recall': test_rec,
        'loss': test_loss
    }


STEP 9: MODEL EVALUATION
\n--------------------------------------------------------------------------------
EVALUATING: C3D
--------------------------------------------------------------------------------
\nTest Metrics:
  - Loss: 1.6274
  - Accuracy: 0.6696
  - AUC: 0.8016
  - Precision: 0.4444
  - Recall: 0.9333
\nClassification Report:
              precision    recall  f1-score   support

   No Stings       0.96      0.57      0.72        82
      Stings       0.44      0.93      0.60        30

    accuracy                           0.67       112
   macro avg       0.70      0.75      0.66       112
weighted avg       0.82      0.67      0.69       112

\nConfusion Matrix:
[[47 35]
 [ 2 28]]
\n--------------------------------------------------------------------------------
EVALUATING: 3DRESNET
--------------------------------------------------------------------------------
\nTest Metrics:
  - Loss: 1.2174
  - Accuracy: 0.8214
  - AUC: 0.8459
  - Precision: 0.9167
  - Recall: 0.36

In [18]:

# ============================================================================
# STEP 10: COMPARISON AND SAVE
# ============================================================================

print("\\n" + "="*80)
print("STEP 10: MODEL COMPARISON")
print("="*80)

print("\\n{:<15} {:<12} {:<12} {:<12} {:<12}".format(
    'Model', 'Accuracy', 'AUC', 'Precision', 'Recall'
))
print("-" * 65)

for model_name, metrics in results.items():
    print("{:<15} {:<12.4f} {:<12.4f} {:<12.4f} {:<12.4f}".format(
        model_name.upper(),
        metrics['accuracy'],
        metrics['auc'],
        metrics['precision'],
        metrics['recall']
    ))

best_model_name = max(results, key=lambda x: results[x]['auc'])
print(f"\\n✓ BEST MODEL: {best_model_name.upper()}")

# Save results
results_df = pd.DataFrame(results).T
results_df.to_csv('model_comparison_3dcnn.csv')
print("\\n✓ Saved to 'model_comparison_3dcnn.csv'")

for model_name, history in histories.items():
    history_df = pd.DataFrame(history.history)
    history_df.to_csv(f'logs/{model_name}_3dcnn_history.csv', index=False)
    print(f"✓ Saved {model_name} history")

print("\\n" + "="*80)
print("TRUE 3D CNN TRAINING COMPLETE!")
print("="*80)
print(f"\\nUsing temporal sequences of {TEMPORAL_LENGTH} days")
print("Models trained with Conv3D layers on spatiotemporal data")
print(f"\\nBest model: {best_model_name.upper()}")
print(f"Load with: keras.models.load_model('models/{best_model_name}_3dcnn_best.keras')")
print("="*80)





STEP 10: MODEL COMPARISON
\nModel           Accuracy     AUC          Precision    Recall      
-----------------------------------------------------------------
C3D             0.6696       0.8016       0.4444       0.9333      
3DRESNET        0.8214       0.8459       0.9167       0.3667      
I3D             0.7411       0.8541       0.5102       0.8333      
\n✓ BEST MODEL: I3D
\n✓ Saved to 'model_comparison_3dcnn.csv'
✓ Saved c3d history
✓ Saved 3dresnet history
✓ Saved i3d history
TRUE 3D CNN TRAINING COMPLETE!
\nUsing temporal sequences of 7 days
Models trained with Conv3D layers on spatiotemporal data
\nBest model: I3D
Load with: keras.models.load_model('models/i3d_3dcnn_best.keras')
