In [1]:
import numpy as np
import random
import torch
from torch.utils.data import DataLoader, Dataset, Subset

random.seed(428)
np.random.seed(428)
torch.use_deterministic_algorithms(True)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.manual_seed(428)
torch.cuda.manual_seed(428)
torch.cuda.manual_seed_all(428)
def seed_worker(worker_id):
    np.random.seed(torch.initial_seed() % 2**32)    
    random.seed(torch.initial_seed() % 2**32)

In [2]:
import os
import subprocess
import importlib  # Import importlib

required_packages = ['torch', 'torchvision', 'segmentation-models-pytorch', 'efficientnet_pytorch']

def install_packages(packages):
    for package in packages:
        try:
            # Check if the package is already installed (Python 3.7+)
            importlib.import_module(package)
            print(f"{package} already installed.")
        except ImportError:
            print(f"Installing {package}...")
            subprocess.check_call(['pip', 'install', package])
            print(f"{package} installed successfully.")

install_packages(required_packages)

torch already installed.
torchvision already installed.
Installing segmentation-models-pytorch...
Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.3.4-py3-none-any.whl.metadata (30 kB)
Collecting efficientnet-pytorch==0.7.1 (from segmentation-models-pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting timm==0.9.7 (from segmentation-models-pytorch)
  Downloading timm-0.9.7-py3-none-any.whl.metadata (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m 

In [3]:
train_csv = '/kaggle/input/2024-flame-ai-challenge/dataset/train.csv'
train_data_dir = '/kaggle/input/2024-flame-ai-challenge/dataset/train'
test_csv = '/kaggle/input/2024-flame-ai-challenge/dataset/test.csv'
test_data_dir = '/kaggle/input/2024-flame-ai-challenge/dataset/test'

SEQ_LEN = 5
seq_len = SEQ_LEN
NUM_STEPS = 20
batch_size = 4
#num_epochs = 1

# Configuration Parameters
use_early_stopping = False  # Set to False to disable early stopping
patience = 5      # Number of epochs to wait for improvement
delta = 0     # Minimum change to qualify as improvement

# Define Model Names
model_names = [
#    'unet',
#    'deeplabtemporal',
#   'resnet50',
#    'convlstm',
    'residualcnn',
#    'residualcnnsa',
#    'residualdensecnn',
#    'dconvlstmsac',
#    'selfattconvlstm',
#    'bidirectconvlstmunet',
#    #'pinn-convlstm',
#     '3dconvlstm',
#    'convlstm_autoencoder',
#    'stconvlstm_autoencoder',
#    'spatiotemporal_transformer',
#    'attentionunet',
#    'tcnmodel',
#    'hybridcnntransformer',
#    'multiscaleconvlstm',
]

# Define default number of epochs
num_epochs_default = 1

# Update model_epochs to include all models or rely on the default
model_epochs = {
#     'unet': 100,
#     'deeplabtemporal': 5,
#      'resnet50': 50,
#      'convlstm': 50,
      'residualcnn': 400,
#     'residualcnnsa': 60,
#      'dconvlstmsac': 30,
#      'selfattconvlstm': 50,
#      'bidirectconvlstmunet': 50,
#      'pinn-convlstm': 30,
#      '3dconvlstm': 30,
#     'convlstm_autoencoder': 30,
#     'stconvlstm_autoencoder': 30,
#     'spatiotemporal_transformer': 30  
#     'attentionunet': 50,
#     'tcnmodel': 50,
#     'hybridcnntransformer': 50,
#     'residualdensecnn': 50,
#     'multiscaleconvlstm': 40,
}

In [4]:
# 1. Import Necessary Libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import (
    resnet18, resnet34, resnet50, resnet101, resnet152,
    ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights,
    densenet121, DenseNet121_Weights,
    mobilenet_v2, MobileNet_V2_Weights
)
from efficientnet_pytorch import EfficientNet  # Ensure this is installed
from torch.utils.data import DataLoader, Dataset, Subset
import pandas as pd
import numpy as np
import os
import torchvision.ops as ops  # For DeformConv2d
import segmentation_models_pytorch as smp

# Try importing MobileNetV3 from timm
try:
    import timm
    has_timm = True
except ImportError:
    has_timm = False
    print("timm library not found. MobileNetV3 will not be available. Please install it using `pip install timm`.")

In [5]:
class FireDataset(Dataset):
    def __init__(self, csv_data, data_dir, seq_len=5, multi_step=20, is_train=True):
        """
        Args:
            csv_data (str or DataFrame): Path to the CSV file or a pandas DataFrame.
            data_dir (str): Directory containing the .dat files.
            seq_len (int): Number of past timesteps to use as input.
            multi_step (int): Number of future timesteps to predict.
            is_train (bool): Flag indicating training or testing mode.
        """
        if isinstance(csv_data, str):
            self.data_info = pd.read_csv(csv_data)
        else:
            self.data_info = csv_data  # Accept DataFrame directly
        
        self.data_dir = data_dir
        self.seq_len = seq_len
        self.multi_step = multi_step
        self.is_train = is_train
        self.samples = self._create_samples()

    def _create_samples(self):
        samples = []
        for _, row in self.data_info.iterrows():
            id = row['id']
            u = row['u']
            alpha = row['alpha']
            Nt = row['Nt']
            # Load filenames
            theta_path = os.path.join(self.data_dir, row['theta_filename'])
            ustar_path = os.path.join(self.data_dir, row['ustar_filename'])
            xi_path = os.path.join(self.data_dir, row['xi_filename'])

            # Check if files exist
            if not os.path.exists(theta_path) or not os.path.exists(ustar_path) or not os.path.exists(xi_path):
                print(f"Missing files for ID {id}. Skipping.")
                continue

            # Load .dat files (assuming binary format)
            try:
                theta = np.fromfile(theta_path, dtype=np.float32).reshape(Nt, 113, 32)
                ustar = np.fromfile(ustar_path, dtype=np.float32).reshape(Nt, 113, 32)
                xi = np.fromfile(xi_path, dtype=np.float32).reshape(Nt, 113, 32)
            except ValueError as e:
                print(f"Error reshaping files for ID {id}: {e}")
                continue

            if self.is_train:
                # Ensure there are enough timesteps to create at least one sample
                required_timesteps = self.seq_len + self.multi_step
                if Nt < required_timesteps:
                    print(f"Not enough timesteps for ID {id}. Required: {required_timesteps}, Available: {Nt}")
                    continue

                # Create multiple samples using rolling window with step size NUM_STEPS - SEQ_LEN (15)
                #step_size = NUM_STEPS - self.seq_len  # 15
                step_size = 1

                for t in range(0, Nt - self.seq_len - self.multi_step + 1, step_size):
                    # Input sequences: theta, ustar, xi for seq_len timesteps
                    theta_seq = theta[t:t+self.seq_len]  # Shape: [seq_len, 113,32]
                    ustar_seq = ustar[t:t+self.seq_len]
                    xi_seq = xi[t:t+self.seq_len]

                    # Target sequences: xi for multi_step timesteps
                    target_seq = xi[t+self.seq_len:t+self.seq_len + self.multi_step]  # Shape: [multi_step, 113,32]

                    # Stack features per time step: [seq_len, 5, 113,32]
                    features = []
                    for i in range(self.seq_len):
                        theta_i = theta_seq[i]  # [113,32]
                        ustar_i = ustar_seq[i]
                        xi_i = xi_seq[i]
                        u_i = u
                        alpha_i = alpha

                        # Convert scalar features to tensors and tile
                        u_tensor = torch.tensor(u_i, dtype=torch.float32).unsqueeze(0).unsqueeze(1).repeat(1, 113, 32)  # [1,113,32]
                        alpha_tensor = torch.tensor(alpha_i, dtype=torch.float32).unsqueeze(0).unsqueeze(1).repeat(1, 113, 32)  # [1,113,32]

                        # Stack all features
                        feature = torch.stack([
                            torch.tensor(ustar_i, dtype=torch.float32),  # [113,32]
                            torch.tensor(theta_i, dtype=torch.float32),  # [113,32]
                            torch.tensor(xi_i, dtype=torch.float32),     # [113,32]
                            u_tensor.squeeze(0),                        # [113,32]
                            alpha_tensor.squeeze(0)                     # [113,32]
                        ], dim=0)  # [5,113,32]

                        features.append(feature)
                    
                    feature_sequence = torch.stack(features, dim=0)  # [seq_len, 5, 113,32]

                    # Target is the next multi_step xi timesteps
                    target = torch.tensor(target_seq, dtype=torch.float32)  # [multi_step,113,32]

                    samples.append({
                        'id': id,
                        'input': feature_sequence,  # [5,5,113,32]
                        'target': target  # [20,113,32]
                    })
            else:
                # For test set, you might not have targets, or handle similarly
                if Nt < self.seq_len:
                    print(f"Not enough timesteps for ID {id} in test set. Required: {self.seq_len}, Available: {Nt}")
                    continue

                theta_seq = theta[:self.seq_len]
                ustar_seq = ustar[:self.seq_len]
                xi_seq = xi[:self.seq_len]

                # Stack features per time step: [seq_len, 5, 113,32]
                features = []
                for i in range(self.seq_len):
                    theta_i = theta_seq[i]  # [113,32]
                    ustar_i = ustar_seq[i]
                    xi_i = xi_seq[i]
                    u_i = u
                    alpha_i = alpha

                    # Convert scalar features to tensors and tile
                    u_tensor = torch.tensor(u_i, dtype=torch.float32).unsqueeze(0).unsqueeze(1).repeat(1, 113, 32)  # [1,113,32]
                    alpha_tensor = torch.tensor(alpha_i, dtype=torch.float32).unsqueeze(0).unsqueeze(1).repeat(1, 113, 32)  # [1,113,32]

                    # Stack all features
                    feature = torch.stack([
                        torch.tensor(ustar_i, dtype=torch.float32),  # [113,32]
                        torch.tensor(theta_i, dtype=torch.float32),  # [113,32]
                        torch.tensor(xi_i, dtype=torch.float32),     # [113,32]
                        u_tensor.squeeze(0),                        # [113,32]
                        alpha_tensor.squeeze(0)                     # [113,32]
                    ], dim=0)  # [5,113,32]

                    features.append(feature)
                
                feature_sequence = torch.stack(features, dim=0)  # [seq_len, 5, 113,32]

                samples.append({
                    'id': id,
                    'input': feature_sequence,  # [5,5,113,32]
                    'target': None  # No target for test set
                })

        return samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        if self.is_train:
            return sample['input'], sample['target']  # [3,5,113,32], [1,113,32]
        else:
            return sample['input']  # Return only the input for the test set



In [6]:
class ResidualBlock(nn.Module):
    """
    Simplified Residual Block with a single convolution layer and skip connection.
    """
    def __init__(self, channels, kernel_size=3, padding=1, activation=nn.ReLU(inplace=True)):
        super(ResidualBlock, self).__init__()
        self.activation = activation
        self.conv = nn.Conv2d(channels, channels, kernel_size=kernel_size, padding=padding, stride=1, bias=False)
        self.bn = nn.BatchNorm2d(channels)

    def forward(self, x):
        identity = x  # Skip connection
        out = self.conv(x)
        out = self.bn(out)
        out += identity  # Add skip connection
        out = self.activation(out)
        return out

class ResidualCNN(nn.Module):
    """
    Residual CNN tailored for multi-step forecasting.
    """
    def __init__(self, in_channels=25, num_residual_blocks=4, out_channels=1, num_steps=20, kernel_size=3, padding=1, activation=nn.ReLU(inplace=True)):
        super(ResidualCNN, self).__init__()
        self.num_steps = num_steps
        self.activation = activation
        self.initial_conv = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=1, padding=padding, bias=False)
        self.bn = nn.BatchNorm2d(in_channels)

        # Residual blocks
        self.residual_blocks = nn.Sequential(*[
            ResidualBlock(in_channels, kernel_size=kernel_size, padding=padding, activation=activation)
            for _ in range(num_residual_blocks)
        ])

        # Output convolution layer modified to output num_steps
        self.output_conv = nn.Conv2d(in_channels, out_channels * num_steps, kernel_size=1, stride=1, padding=0, bias=True)

    def forward(self, x):
        """
        Forward pass for ResidualCNN.

        Args:
            x (Tensor): Input tensor of shape [batch_size, seq_len, 5, 113, 32].

        Returns:
            Tensor: Output tensor of shape [batch_size, num_steps, out_channels, 113, 32].
        """
        batch_size, seq_len, channels, height, width = x.size()
        x = x.view(batch_size, seq_len * channels, height, width)  # [batch_size, 25, 113, 32]

        out = self.initial_conv(x)  # [batch_size, 25, 113, 32]
        out = self.bn(out)
        out = self.activation(out)

        out = self.residual_blocks(out)  # [batch_size, 25, 113, 32]
        out = self.output_conv(out)      # [batch_size, 20, 113, 32] assuming num_steps=20

        # Reshape to [batch_size, num_steps, out_channels, 113, 32]
        out = out.view(batch_size, self.num_steps, -1, height, width)

        return out


In [7]:
def train_model(
    model, 
    train_loader, 
    val_loader, 
    criterion, 
    optimizer, 
    num_epochs, 
    device, 
    model_save_path
):
    """
    Trains the model and saves the best model based on validation loss.

    Args:
        model (nn.Module): The model to train.
        train_loader (DataLoader): DataLoader for training data.
        val_loader (DataLoader): DataLoader for validation data.
        criterion (nn.Module): Loss function.
        optimizer (torch.optim.Optimizer): Optimizer.
        num_epochs (int): Number of epochs to train.
        device (torch.device): Device to train on.
        model_save_path (str): Path to save the best model.

    Returns:
        model (nn.Module): The trained model loaded with the best weights.
        best_val_loss (float): The best validation loss achieved.
    """
    best_val_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        print(f"\nEpoch {epoch+1}/{num_epochs} - Training")
        for inputs, targets in train_loader:
            inputs = inputs.to(device)        # [batch_size, 5, 5, 113,32]
            targets = targets.to(device)      # [batch_size, 20,113,32]
#             print(f"Input shape: {inputs.shape}")    # Should be [batch_size, 5, 5, 113, 32]
#             print(f"Target shape: {targets.shape}")  # Should be [batch_size, 20, 113, 32]

            optimizer.zero_grad()
            outputs = model(inputs)           # [batch_size, 20,1,113,32]

            # Compute multi-step loss
            # Adjust targets shape to match outputs by adding channel dimension
            loss = criterion(outputs, targets.unsqueeze(2))  # [batch_size, 20,1,113,32]
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_train_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1} Training Loss: {epoch_train_loss:.4f}")

        # Validation phase
        model.eval()
        val_loss = 0.0

        #print(f"Epoch {epoch+1} Validation")
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.to(device)        # [batch_size, 5, 5, 113,32]
                targets = targets.to(device)      # [batch_size, 20,113,32]

                outputs = model(inputs)           # [batch_size, 20,1,113,32]
                loss = criterion(outputs, targets.unsqueeze(2))  # [batch_size, 20,1,113,32]
                val_loss += loss.item() * inputs.size(0)

        epoch_val_loss = val_loss / len(val_loader.dataset)
        print(f"Epoch {epoch+1} Validation Loss: {epoch_val_loss:.4f}")

        # Save the model if validation loss has decreased
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            torch.save(model.state_dict(), model_save_path)
            print(f"Validation loss decreased. Saving model to {model_save_path}")
        else:
            print("Validation loss did not improve.")

    # Load the best model
    model.load_state_dict(torch.load(model_save_path))
    print(f"\nTraining complete. Best Validation Loss: {best_val_loss:.4f}")
    return model, best_val_loss


In [8]:
from tqdm import tqdm

# 12. Create Training Dataset and Define Folds
df_train = pd.read_csv(train_csv)
simulations = df_train['id'].unique()
num_folds = len(simulations)  # 9 folds

# Shuffle simulations for randomness
np.random.seed(42)
shuffled_simulations = np.random.permutation(simulations)

# Assign each simulation to a fold (one simulation per fold)
folds = []
for sim in shuffled_simulations:
    folds.append([sim])  # Each fold has one simulation

criterion = nn.MSELoss()
saved_model_paths = []
fold_val_losses = {}
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
for fold_idx, val_simulations in enumerate(folds):
    print(f'\nFold {fold_idx+1}/{num_folds}')
    
    val_mask = df_train['id'].isin(val_simulations)
    train_mask = ~val_mask
    
    print(f"\nFold {fold_idx + 1} - Validation 'id's:")
    print(df_train.loc[val_mask, 'id'].unique())  # Print unique IDs in validation set

    print(f"\nFold {fold_idx + 1} - Train 'id's:")
    print(df_train.loc[train_mask, 'id'].unique())  # Print unique IDs in training set

    # Create training and validation subsets
    train_subset = FireDataset(df_train[train_mask], train_data_dir, seq_len=seq_len, multi_step=NUM_STEPS, is_train=True)
    val_subset = FireDataset(df_train[val_mask], train_data_dir, seq_len=seq_len, multi_step=NUM_STEPS, is_train=True)


    print(f'Training samples: {len(train_subset)}')
    print(f'Validation samples: {len(val_subset)}')

    # Create DataLoaders
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, worker_init_fn=seed_worker)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, worker_init_fn=seed_worker)

    # For each model
    for encoder_name in model_names:
        print(f'\nTraining model with encoder {encoder_name}')
        if encoder_name == 'residualcnn':
            model = ResidualCNN(
            in_channels=SEQ_LEN * 5,  # 5 channels per timestep
            num_residual_blocks=4,
            out_channels=1,
            num_steps=NUM_STEPS,
            kernel_size=3,
            padding=1,
            activation=nn.ReLU(inplace=True)
        ).to(device)
        
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

        # Define model save path
        model_save_path = f'model_{encoder_name}_fold{fold_idx+1}.pth'
        
        # Retrieve num_epochs for the current model
        current_num_epochs = model_epochs.get(encoder_name, num_epochs_default)  # Fallback to default num_epochs if not specified
        print(f"Training {encoder_name} for {current_num_epochs}")

        # Train the model with Early Stopping
        model, best_val_loss = train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            criterion=criterion,
            optimizer=optimizer,
            num_epochs=current_num_epochs,
            device=device,
            model_save_path=model_save_path,
        )
        
        fold_val_losses[(fold_idx + 1, encoder_name)] = best_val_loss
        
        # Append the model save path
        saved_model_paths.append(model_save_path)

# Print average score for each fold and across all models
for fold_idx in range(num_folds):
    fold_losses = [loss for (fold, model), loss in fold_val_losses.items() if fold == fold_idx + 1]
    avg_fold_loss = np.mean(fold_losses) if fold_losses else 0.0  # Handle potential empty list
    print(f"Fold {fold_idx+1} Average Validation Loss: {avg_fold_loss:.4f}")

# Calculate and print overall average validation loss across all folds and models
overall_avg_loss = np.mean(list(fold_val_losses.values()))
print(f"Overall Average Validation Loss: {overall_avg_loss:.4f}")


Fold 1/9

Fold 1 - Validation 'id's:
[808631]

Fold 1 - Train 'id's:
[804025 875935 930086 661713 633229 868570  16525 220212]
Training samples: 1008
Validation samples: 126

Training model with encoder residualcnn
Training residualcnn for 400

Epoch 1/400 - Training
Epoch 1 Training Loss: 0.2592
Epoch 1 Validation Loss: 0.0858
Validation loss decreased. Saving model to model_residualcnn_fold1.pth

Epoch 2/400 - Training
Epoch 2 Training Loss: 0.0523
Epoch 2 Validation Loss: 0.0406
Validation loss decreased. Saving model to model_residualcnn_fold1.pth

Epoch 3/400 - Training
Epoch 3 Training Loss: 0.0315
Epoch 3 Validation Loss: 0.0310
Validation loss decreased. Saving model to model_residualcnn_fold1.pth

Epoch 4/400 - Training
Epoch 4 Training Loss: 0.0266
Epoch 4 Validation Loss: 0.0287
Validation loss decreased. Saving model to model_residualcnn_fold1.pth

Epoch 5/400 - Training
Epoch 5 Training Loss: 0.0244
Epoch 5 Validation Loss: 0.0316
Validation loss did not improve.

Epoch 6

  model.load_state_dict(torch.load(model_save_path))


Training samples: 1008
Validation samples: 126

Training model with encoder residualcnn
Training residualcnn for 400

Epoch 1/400 - Training
Epoch 1 Training Loss: 0.1601
Epoch 1 Validation Loss: 0.0411
Validation loss decreased. Saving model to model_residualcnn_fold2.pth

Epoch 2/400 - Training
Epoch 2 Training Loss: 0.0355
Epoch 2 Validation Loss: 0.0244
Validation loss decreased. Saving model to model_residualcnn_fold2.pth

Epoch 3/400 - Training
Epoch 3 Training Loss: 0.0279
Epoch 3 Validation Loss: 0.0217
Validation loss decreased. Saving model to model_residualcnn_fold2.pth

Epoch 4/400 - Training
Epoch 4 Training Loss: 0.0257
Epoch 4 Validation Loss: 0.0201
Validation loss decreased. Saving model to model_residualcnn_fold2.pth

Epoch 5/400 - Training
Epoch 5 Training Loss: 0.0240
Epoch 5 Validation Loss: 0.0182
Validation loss decreased. Saving model to model_residualcnn_fold2.pth

Epoch 6/400 - Training
Epoch 6 Training Loss: 0.0219
Epoch 6 Validation Loss: 0.0166
Validation l

In [10]:
def predict_ensemble(saved_model_paths, test_loader, device, num_timesteps, models, model_seq_lens):
    """
    Generate ensemble predictions by averaging outputs from multiple models.
    """
    ensemble_predictions = []
    ids = [sample['id'] for sample in test_loader.dataset.samples]
    model_output_timesteps = 20  # Number of timesteps each model predicts
    NT = num_timesteps // model_output_timesteps
    remainder = num_timesteps % model_output_timesteps
    total_iterations = NT + (1 if remainder > 0 else 0)
    print(f"Total iterations required: {total_iterations}")

    for idx, inputs in enumerate(tqdm(test_loader, desc="Predicting")):  # Removed target
        input_seq = inputs.to(device)        # [1, 5, 5, 113, 32]

        # Initialize an empty list to store predictions for this sample
        preds_per_id = []

        for t in range(total_iterations):
            ensemble_output = np.zeros((model_output_timesteps, 113, 32), dtype=np.float32)
            model_count = 0  # To count valid models for averaging

            for model, seq_len in zip(models, model_seq_lens):
                with torch.no_grad():
                    selected_seq = input_seq[:, -seq_len:, :, :, :] 
                    output = model(selected_seq) 
                    output = output.view(1, model_output_timesteps, 1, 113, 32)
                    ensemble_output += output.reshape(20, 113, 32).cpu().numpy()
                    model_count += 1

            if model_count == 0:
                raise ValueError("No valid models were processed for averaging.")

            ensemble_output /= model_count  # Averaging over ensemble
            
            if (t == total_iterations - 1) and (remainder > 0):
                ensemble_output = ensemble_output[:remainder]

            preds_per_id.append(ensemble_output.reshape(-1))  
            ensemble_output_tensor = torch.from_numpy(ensemble_output).unsqueeze(0).to(device)  
            num_new_timesteps = ensemble_output_tensor.shape[1]  
            new_time_steps = input_seq[:, -1, :, :, :].unsqueeze(1).repeat(1, num_new_timesteps, 1, 1, 1).clone()
            new_time_steps[:, :, 2, :, :] = ensemble_output_tensor 
            input_seq = torch.cat([input_seq, new_time_steps], dim=1)  
            input_seq = input_seq[:, -seq_len:, :, :, :]

        preds_flat = np.concatenate(preds_per_id)  
        ensemble_predictions.append(preds_flat)
        
    return ensemble_predictions, ids

In [11]:
!pip freeze > requirements.txt

In [12]:
def prepare_submission(predictions, ids, submission_csv, num_timesteps=20):
    """
    Prepares the submission CSV file from predictions and IDs.

    Args:
        predictions (list): List of numpy arrays containing predictions for each test sample.
        ids (list): List of sample IDs corresponding to the predictions.
        submission_csv (str): Path to save the submission CSV.
        num_timesteps (int): Number of future timesteps predicted.

    Returns:
        None
    """
    print("Preparing submission file...")
    submission = pd.DataFrame(predictions)
    submission.insert(0, 'id', ids)
    expected_pixels = 113 * 32 * num_timesteps  # 72,320 for 20 timesteps
    submission.columns = ['id'] + [f'pixel_{i}' for i in range(1, submission.shape[1])]
    assert submission.shape == (len(predictions), 1 + expected_pixels), f"Expected shape ({len(predictions)}, {1 + expected_pixels}), got {submission.shape}"
    submission.to_csv(submission_csv, index=False)
    print(f'Submission file saved as {submission_csv}')


In [13]:
import os

def parse_model_path(model_path):
    """
    Parses the model path to extract encoder_name and fold.
    Assumes the model_path format: 'model_<encoder_name>_fold<fold>.pth'
    Example: 'model_residualcnn_fold1.pth'
    """
    filename = os.path.basename(model_path)
    parts = filename.split('_')

    if len(parts) < 3:
        raise ValueError(f"Unexpected model filename format: {filename}")

    encoder_name = parts[1]

    # Extract fold
    fold_part = parts[2]  # e.g., 'fold1.pth'
    fold_str = fold_part.split('.')[0]  # 'fold1'
    fold = int(fold_str.replace('fold', ''))

    # Define sequence length (seq_len) based on encoder_name using a predefined mapping
    encoder_seq_len_mapping = {
        'residualcnn': 5,
        'residualcnnsa': 5,
        'convlstm': 5,
        'selfattconvlstm': 5,
        '3dconvlstm': 5,
        'dconvlstmsac': 5,
        'bidirectconvlstmunet': 5,
        'attentionunet': 5,
        'deeplabtemporal': 5,
        'unet': 5,
        'residualdensecnn': 5,
        'multiscaleconvlstm': 5,
    }

    if encoder_name not in encoder_seq_len_mapping:
        raise ValueError(f"Unknown encoder_name '{encoder_name}', cannot determine seq_len.")

    seq_len = encoder_seq_len_mapping[encoder_name]

    return encoder_name, fold, seq_len


In [14]:
test_dataset = FireDataset(
        csv_data=test_csv,
        data_dir=test_data_dir,
        seq_len=SEQ_LEN,
        multi_step=NUM_STEPS,
        is_train=False
    )

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=1,  # Typically batch_size=1 for test predictions
    shuffle=False,
    num_workers=0,
    pin_memory=True,
    worker_init_fn=seed_worker,
)

models = []
model_seq_lens = []

# Load saved models - load state_dicts
for model_path in saved_model_paths:
    try:
        encoder_name, fold, seq_len = parse_model_path(model_path)
    except ValueError as e:
        print(f"Skipping model {model_path}: {e}")
        continue

    print(f"Loading model: {model_path}, Encoder: {encoder_name}, Fold: {fold}, Seq_len: {seq_len}")

    # Initialize the model with the correct in_channels based on seq_len
    in_channels = seq_len * 5  # 5 channels per time step

    if encoder_name == 'residualcnn':
        model = ResidualCNN(in_channels=in_channels, num_residual_blocks=4).to(device)

    # Load state_dict
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    
    models.append(model)
    model_seq_lens.append(seq_len)  # Store the seq_len for this model

# Generate Ensemble Predictions
ensemble_predictions, ids = predict_ensemble(
    saved_model_paths=saved_model_paths,
    test_loader=test_loader,
    device=device,
    num_timesteps=NUM_STEPS,
    models=models,
    model_seq_lens=model_seq_lens
)


# Prepare Submission File
prepare_submission(
    predictions=ensemble_predictions, 
    ids=ids, 
    submission_csv='submission.csv', 
    num_timesteps=NUM_STEPS
)


  model.load_state_dict(torch.load(model_path, map_location=device))


Loading model: model_residualcnn_fold1.pth, Encoder: residualcnn, Fold: 1, Seq_len: 5
Loading model: model_residualcnn_fold2.pth, Encoder: residualcnn, Fold: 2, Seq_len: 5
Loading model: model_residualcnn_fold3.pth, Encoder: residualcnn, Fold: 3, Seq_len: 5
Loading model: model_residualcnn_fold4.pth, Encoder: residualcnn, Fold: 4, Seq_len: 5
Loading model: model_residualcnn_fold5.pth, Encoder: residualcnn, Fold: 5, Seq_len: 5
Loading model: model_residualcnn_fold6.pth, Encoder: residualcnn, Fold: 6, Seq_len: 5
Loading model: model_residualcnn_fold7.pth, Encoder: residualcnn, Fold: 7, Seq_len: 5
Loading model: model_residualcnn_fold8.pth, Encoder: residualcnn, Fold: 8, Seq_len: 5
Loading model: model_residualcnn_fold9.pth, Encoder: residualcnn, Fold: 9, Seq_len: 5
Total iterations required: 1


Predicting: 100%|██████████| 27/27 [00:00<00:00, 73.63it/s]


Preparing submission file...
Submission file saved as submission.csv
