In [None]:
# Install the timm library
# !pip install timm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm
import numpy as np
import pandas as pd
import cv2
import os
from PIL import Image
import timm  # Import timm library

# Define Transforms for Data Augmentation and Preprocessing
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to a fixed size
    transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
    transforms.RandomVerticalFlip(),  # Randomly flip images vertically
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to a fixed size
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])

# Custom Dataset Class
class SolarPanelDataset(Dataset):
    def __init__(self, df, transform=None, to_train=True):
        self.df = df
        self.transform = transform
        self.to_train = to_train
        self.image_dir = "/kaggle/input/lacuna-solar-survey-challenge/images"

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.df.iloc[idx]["ID"] + ".jpg")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        # Process metadata
        metadata = self.df.iloc[idx][["img_origin", "placement"]]
        img_origin = 0 if metadata["img_origin"] == "S" else 1  # S -> 0, D -> 1
        placement = {"roof": 0, "openspace": 1, "r_openspace": 2, "S-unknown": 3}[metadata["placement"]]
        metadata_encoded = img_origin * 10 + placement  # Combine into a single integer

        if self.to_train:
            target = self.df.iloc[idx][["pan_nbr", "boil_nbr"]].values.astype(np.float32)
            return image, metadata_encoded, target
        else:
            return image, metadata_encoded

# Advanced Model with ConvNeXt Backbone from timm
class AdvancedModel(nn.Module):
    def __init__(self):
        super(AdvancedModel, self).__init__()
        self.backbone = timm.create_model("convnext_base", pretrained=True, num_classes=0)  # Load ConvNeXt from timm
        self.metadata_embedding = nn.Embedding(num_embeddings=14, embedding_dim=128)  # 14 possible metadata values (0-13)
        self.fc = nn.Linear(1024 + 128, 2)  # Combine image and metadata features

    def forward(self, images, metadata):
        image_features = self.backbone(images)
        metadata_features = self.metadata_embedding(metadata)
        combined_features = torch.cat([image_features, metadata_features], dim=1)
        return self.fc(combined_features)

# Training Function
def train(fold, train_loader, val_loader, epochs, batch_size):
    model = AdvancedModel().cuda()
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)
    scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
    scaler = GradScaler()  # Correct initialization for older PyTorch versions
    criterion = nn.L1Loss()  # MAE for counting

    best_mae = float("inf")
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for images, metadata, targets in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, metadata, targets = images.cuda(), metadata.cuda(), targets.cuda()
            optimizer.zero_grad()
            with autocast():
                outputs = model(images, metadata)
                loss = criterion(outputs, targets)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            train_loss += loss.item()
        scheduler.step()

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for images, metadata, targets in val_loader:
                images, metadata, targets = images.cuda(), metadata.cuda(), targets.cuda()
                outputs = model(images, metadata)
                val_loss += criterion(outputs, targets).item()
        val_loss /= len(val_loader)
        print(f"Fold {fold+1}, Epoch {epoch+1}: Train Loss = {train_loss/len(train_loader):.4f}, Val Loss = {val_loss:.4f}")

        if val_loss < best_mae:
            best_mae = val_loss
            torch.save(model.state_dict(), f"best_model_fold{fold}.pth")
    return best_mae

# Inference Function
def predict(test_df, model_paths, batch_size):
    test_ds = SolarPanelDataset(test_df, transform=val_transform, to_train=False)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=4)

    predictions = np.zeros((len(test_df), 2))
    for path in model_paths:
        model = AdvancedModel().cuda()
        model.load_state_dict(torch.load(path))
        model.eval()

        with torch.no_grad():
            for images, metadata in tqdm(test_loader, desc="Inference"):
                images, metadata = images.cuda(), metadata.cuda()
                outputs = model(images, metadata)
                predictions += outputs.cpu().numpy()
    return predictions / len(model_paths)

# Main Execution
if __name__ == "__main__":
    # Load data
    train_df = pd.read_csv("/kaggle/input/lacuna-solar-survey-challenge/Train.csv")
    test_df = pd.read_csv("/kaggle/input/lacuna-solar-survey-challenge/Test.csv")

    # Cross-validation
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    model_paths = []
    for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df["placement"])):
        print(f"Training fold {fold+1}")
        train_dataset = SolarPanelDataset(train_df.iloc[train_idx], transform=train_transform)
        val_dataset = SolarPanelDataset(train_df.iloc[val_idx], transform=val_transform)
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)
        best_mae = train(fold, train_loader, val_loader, epochs=25, batch_size=16)
        model_paths.append(f"best_model_fold{fold}.pth")

    # Inference
    predictions = predict(test_df, model_paths, batch_size=64)

    # Create submission
    submission = pd.DataFrame({
        "ID": np.repeat(test_df["ID"].values, 2),
        "Target": predictions.flatten()
    })
    submission["ID"] += np.where(
        submission.groupby("ID").cumcount() == 0,
        "_boil",
        "_pan"
    )
    submission.to_csv("submission_original.csv", index=False)

    # Integer submission
    int_submission = submission.copy()
    int_submission["Target"] = np.round(int_submission["Target"]).astype(int)
    int_submission.to_csv("submission_integer.csv", index=False)

    print("Submissions saved with shapes:", submission.shape, int_submission.shape)

Training fold 1


model.safetensors:   0%|          | 0.00/354M [00:00<?, ?B/s]

  scaler = GradScaler()  # Correct initialization for older PyTorch versions
  with autocast():
Epoch 1/25: 100%|██████████| 283/283 [06:47<00:00,  1.44s/it]


Fold 1, Epoch 1: Train Loss = 1.0573, Val Loss = 0.9105


Epoch 2/25: 100%|██████████| 283/283 [06:29<00:00,  1.38s/it]


Fold 1, Epoch 2: Train Loss = 0.9125, Val Loss = 0.8293


Epoch 3/25: 100%|██████████| 283/283 [06:29<00:00,  1.38s/it]


Fold 1, Epoch 3: Train Loss = 0.8317, Val Loss = 0.8175


Epoch 4/25: 100%|██████████| 283/283 [06:27<00:00,  1.37s/it]


Fold 1, Epoch 4: Train Loss = 0.7898, Val Loss = 0.7798


Epoch 5/25: 100%|██████████| 283/283 [06:27<00:00,  1.37s/it]


Fold 1, Epoch 5: Train Loss = 0.7361, Val Loss = 0.7635


Epoch 6/25: 100%|██████████| 283/283 [06:26<00:00,  1.37s/it]


Fold 1, Epoch 6: Train Loss = 0.7188, Val Loss = 0.7703


Epoch 7/25: 100%|██████████| 283/283 [06:29<00:00,  1.38s/it]


Fold 1, Epoch 7: Train Loss = 0.7031, Val Loss = 0.7389


Epoch 8/25: 100%|██████████| 283/283 [06:26<00:00,  1.37s/it]


Fold 1, Epoch 8: Train Loss = 0.6540, Val Loss = 0.7257


Epoch 9/25: 100%|██████████| 283/283 [06:31<00:00,  1.38s/it]


Fold 1, Epoch 9: Train Loss = 0.6345, Val Loss = 0.7380


Epoch 10/25: 100%|██████████| 283/283 [06:26<00:00,  1.37s/it]


Fold 1, Epoch 10: Train Loss = 0.6244, Val Loss = 0.7387


Epoch 11/25: 100%|██████████| 283/283 [06:25<00:00,  1.36s/it]


Fold 1, Epoch 11: Train Loss = 0.6023, Val Loss = 0.7279


Epoch 12/25: 100%|██████████| 283/283 [06:28<00:00,  1.37s/it]


Fold 1, Epoch 12: Train Loss = 0.5930, Val Loss = 0.7053


Epoch 13/25: 100%|██████████| 283/283 [06:27<00:00,  1.37s/it]


Fold 1, Epoch 13: Train Loss = 0.5702, Val Loss = 0.7164


Epoch 14/25:   6%|▌         | 17/283 [00:27<08:27,  1.91s/it]