In [1]:
# ==================================================================================
# MM-CTR Challenge - Task 1&2: Complete Pipeline (Embedding + CTR)
# Approach: Generate CLIP embeddings + Train optimized CTR model
# ==================================================================================

import os
import gc
import zipfile
import subprocess
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from PIL import Image
from tqdm.auto import tqdm
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score

# Install dependencies
print("Setting up environment...")
try:
    import clip
except:
    subprocess.run([
        "pip", "install", "-q",
        "ftfy", "regex", 
        "git+https://github.com/openai/CLIP.git"
    ], check=True)
    import clip

try:
    import polars as pl
except:
    subprocess.run(["pip", "install", "-q", "polars"], check=True)
    import polars as pl

print("Environment ready!\n")

# ==================================================================================
# Configuration
# ==================================================================================

class Config:
    """Configuration for Task 1&2 pipeline"""
    
    # Paths
    DATASET_ROOT = Path("/kaggle/input/mm-ctr-competition/MM_CTR_Competition")
    TRAIN_DATA_PATH = DATASET_ROOT / "MicroLens_1M_x1"
    ITEM_METADATA = DATASET_ROOT / "item_feature.parquet"
    IMAGE_ROOT = DATASET_ROOT / "item_images" / "item_images"
    
    # Output
    WORK_DIR = Path("/kaggle/working")
    EMBEDDING_OUTPUT = WORK_DIR / "task1and2_embeddings.parquet"
    CHECKPOINT_DIR = WORK_DIR / "checkpoints_task1and2"
    SUBMISSION_DIR = WORK_DIR / "submission_task1and2"
    
    # Embedding settings (Task 1 component)
    CLIP_MODEL = "ViT-B/32"
    TARGET_DIM = 128
    CLIP_BATCH = 64
    
    # Training settings (Task 2 component)
    TRAIN_BATCH = 2048
    LR = 4e-4
    MAX_EPOCHS = 30
    WEIGHT_DECAY = 1.2e-4
    GRAD_CLIP = 4.5
    PATIENCE = 3
    LABEL_SMOOTH = 0.015
    
    # Model architecture
    EMBED_DIM = 128
    SIDE_EMBED = 14
    HIDDEN_LAYERS = [480, 240]
    DROPOUT = 0.27
    
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    SEED = 2025

cfg = Config()
cfg.CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
cfg.SUBMISSION_DIR.mkdir(parents=True, exist_ok=True)

torch.manual_seed(cfg.SEED)
np.random.seed(cfg.SEED)

print(f"Device: {cfg.DEVICE}")
print(f"Output: {cfg.SUBMISSION_DIR}\n")

# ==================================================================================
# PART 1: Generate Embeddings (Task 1 Component)
# ==================================================================================

class ItemDataset(Dataset):
    def __init__(self, metadata, img_dir, transform):
        self.ids = metadata['item_id'].values
        self.titles = metadata['item_title'].fillna("").astype(str).values
        self.img_dir = Path(img_dir)
        self.transform = transform
    
    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, idx):
        item_id = self.ids[idx]
        title = self.titles[idx][:77]
        text_tok = clip.tokenize([title], truncate=True)[0]
        
        img_path = self.img_dir / f"{item_id}.jpg"
        if img_path.exists():
            try:
                img = Image.open(img_path).convert("RGB")
                img_tensor = self.transform(img)
            except:
                img_tensor = torch.zeros(3, 224, 224)
        else:
            img_tensor = torch.zeros(3, 224, 224)
        
        return item_id, text_tok, img_tensor


def generate_embeddings():
    """Part 1: Generate CLIP-based embeddings"""
    
    print("="*70)
    print("PART 1: GENERATING MULTIMODAL EMBEDDINGS")
    print("="*70)
    
    if cfg.EMBEDDING_OUTPUT.exists():
        print(f"Found existing: {cfg.EMBEDDING_OUTPUT}")
        print("Skipping generation.\n")
        return
    
    print(f"Loading {cfg.CLIP_MODEL}...")
    model, preprocess = clip.load(cfg.CLIP_MODEL, device=cfg.DEVICE)
    model.eval()
    
    items_df = pd.read_parquet(cfg.ITEM_METADATA)
    print(f"Processing {len(items_df)} items\n")
    
    dataset = ItemDataset(items_df, cfg.IMAGE_ROOT, preprocess)
    loader = DataLoader(dataset, batch_size=cfg.CLIP_BATCH, num_workers=2, shuffle=False)
    
    item_ids, embeddings = [], []
    
    print("Extracting features...")
    with torch.inference_mode():
        for batch_ids, text, imgs in tqdm(loader, desc="Batches"):
            text = text.to(cfg.DEVICE)
            imgs = imgs.to(cfg.DEVICE)
            
            txt_feat = model.encode_text(text)
            img_feat = model.encode_image(imgs)
            
            txt_feat = txt_feat / txt_feat.norm(dim=1, keepdim=True).clamp(min=1e-8)
            img_feat = img_feat / img_feat.norm(dim=1, keepdim=True).clamp(min=1e-8)
            
            fused = (txt_feat + img_feat) * 0.5
            embeddings.append(fused.cpu().numpy())
            item_ids.extend(batch_ids.tolist())
    
    full_emb = np.vstack(embeddings)
    print(f"\nExtracted: {full_emb.shape}")
    
    print(f"PCA: {full_emb.shape[1]} → {cfg.TARGET_DIM}")
    pca = PCA(n_components=cfg.TARGET_DIM, random_state=cfg.SEED)
    reduced = pca.fit_transform(full_emb)
    print(f"Variance: {pca.explained_variance_ratio_.sum():.1%}\n")
    
    output_df = pd.DataFrame({
        'item_id': item_ids,
        'item_emb_d128': [e.tolist() for e in reduced.astype(np.float32)]
    })
    output_df.to_parquet(cfg.EMBEDDING_OUTPUT, index=False)
    print(f"Saved: {cfg.EMBEDDING_OUTPUT}\n")
    
    del model, full_emb, reduced
    torch.cuda.empty_cache()
    gc.collect()

generate_embeddings()

# ==================================================================================
# PART 2: Load Embeddings & Prepare Data
# ==================================================================================

def load_embeddings():
    """Load generated embeddings"""
    
    print("="*70)
    print("PART 2: LOADING EMBEDDINGS FOR TRAINING")
    print("="*70)
    
    emb_df = pl.read_parquet(str(cfg.EMBEDDING_OUTPUT))
    
    ids = emb_df['item_id'].to_list()
    id_map = {item_id: idx + 1 for idx, item_id in enumerate(ids)}
    
    vectors = np.array(emb_df['item_emb_d128'].to_list(), dtype=np.float32)
    padding = np.zeros((1, cfg.EMBED_DIM), dtype=np.float32)
    matrix = np.vstack([padding, vectors])
    
    print(f"Embedding matrix: {matrix.shape}\n")
    
    return torch.from_numpy(matrix), id_map

EMB_WEIGHTS, ID_MAPPING = load_embeddings()


class CTRDataset(Dataset):
    def __init__(self, path, id_map, test=False):
        df = pl.read_parquet(str(path))
        
        map_fn = lambda arr: np.array([id_map.get(x, 0) for x in arr], dtype=np.int32)
        
        self.target = map_fn(df['item_id'].to_numpy())
        hist = np.stack(df['item_seq'].to_numpy())
        self.history = map_fn(hist.flatten()).reshape(hist.shape)
        self.likes = df['likes_level'].to_numpy().astype(np.int32)
        self.views = df['views_level'].to_numpy().astype(np.int32)
        
        if test:
            self.labels = np.zeros(len(df), dtype=np.float32)
            self.ids = df['ID'].to_numpy().astype(np.int32)
        else:
            self.labels = df['label'].to_numpy().astype(np.float32)
            self.ids = None
        
        del df
        gc.collect()
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (self.history[idx], self.target[idx], 
                self.likes[idx], self.views[idx], self.labels[idx])


# ==================================================================================
# PART 3: CTR Model Architecture
# ==================================================================================

class DiceActivation(nn.Module):
    """Data-dependent activation"""
    def __init__(self, dim):
        super().__init__()
        self.bn = nn.BatchNorm1d(dim, eps=1e-8)
        self.alpha = nn.Parameter(torch.zeros(dim))
    
    def forward(self, x):
        prob = torch.sigmoid(self.bn(x))
        return prob * x + (1 - prob) * self.alpha * x


class AttentionModule(nn.Module):
    """User interest attention"""
    def __init__(self, dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim * 4, 72),
            nn.ReLU(),
            nn.Linear(72, 1)
        )
    
    def forward(self, query, history, mask):
        B, L, D = history.shape
        query_exp = query.expand(-1, L, -1)
        
        features = torch.cat([
            query_exp, history,
            query_exp - history,
            query_exp * history
        ], dim=-1)
        
        scores = self.net(features).squeeze(-1)
        scores = scores.masked_fill(~mask, -1e9)
        weights = torch.softmax(scores, dim=1).unsqueeze(-1)
        
        pooled = (weights * history).sum(dim=1)
        return pooled


class CTRModel(nn.Module):
    """Enhanced CTR prediction model"""
    
    def __init__(self, emb_weights):
        super().__init__()
        n_items, dim = emb_weights.shape
        
        self.item_emb = nn.Embedding(n_items, dim, padding_idx=0)
        self.item_emb.weight.data.copy_(emb_weights)
        self.item_emb.weight.requires_grad = True
        
        self.likes_emb = nn.Embedding(20, cfg.SIDE_EMBED)
        self.views_emb = nn.Embedding(20, cfg.SIDE_EMBED)
        
        self.attention = AttentionModule(dim)
        
        # Prediction tower
        in_dim = dim * 2 + cfg.SIDE_EMBED * 2
        layers = []
        prev = in_dim
        
        for hidden in cfg.HIDDEN_LAYERS:
            layers.extend([
                nn.Linear(prev, hidden),
                DiceActivation(hidden),
                nn.Dropout(cfg.DROPOUT)
            ])
            prev = hidden
        
        layers.append(nn.Linear(prev, 1))
        self.tower = nn.Sequential(*layers)
    
    def forward(self, hist, tgt, likes, views):
        hist_emb = self.item_emb(hist)
        tgt_emb = self.item_emb(tgt).unsqueeze(1)
        
        mask = (hist != 0)
        user_vec = self.attention(tgt_emb, hist_emb, mask)
        
        features = torch.cat([
            tgt_emb.squeeze(1),
            user_vec,
            self.likes_emb(likes),
            self.views_emb(views)
        ], dim=1)
        
        return self.tower(features).squeeze(-1)


# ==================================================================================
# PART 4: Training
# ==================================================================================

def train_model():
    """Part 2: Train CTR model"""
    
    print("="*70)
    print("PART 3: TRAINING CTR MODEL")
    print("="*70)
    
    train_ds = CTRDataset(cfg.TRAIN_DATA_PATH / "train.parquet", ID_MAPPING)
    val_ds = CTRDataset(cfg.TRAIN_DATA_PATH / "valid.parquet", ID_MAPPING)
    
    train_loader = DataLoader(train_ds, batch_size=cfg.TRAIN_BATCH, 
                               shuffle=True, num_workers=2)
    val_loader = DataLoader(val_ds, batch_size=cfg.TRAIN_BATCH * 2, 
                             shuffle=False, num_workers=2)
    
    model = CTRModel(EMB_WEIGHTS).to(cfg.DEVICE)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = AdamW(model.parameters(), lr=cfg.LR, weight_decay=cfg.WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.55, patience=2, min_lr=1e-6
    )
    
    best_auc = 0.0
    no_improve = 0
    best_path = cfg.CHECKPOINT_DIR / "best_model.pt"
    
    print("Training started...\n")
    
    for epoch in range(cfg.MAX_EPOCHS):
        # Train
        model.train()
        total_loss = 0
        
        for hist, tgt, likes, views, labels in tqdm(train_loader, 
                                                     desc=f"Epoch {epoch+1}"):
            hist = hist.to(cfg.DEVICE)
            tgt = tgt.to(cfg.DEVICE)
            likes = likes.to(cfg.DEVICE)
            views = views.to(cfg.DEVICE)
            labels = labels.to(cfg.DEVICE)
            
            # Label smoothing
            labels = labels * (1 - cfg.LABEL_SMOOTH) + cfg.LABEL_SMOOTH / 2
            
            optimizer.zero_grad()
            logits = model(hist, tgt, likes, views)
            loss = criterion(logits, labels)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), cfg.GRAD_CLIP)
            optimizer.step()
            
            total_loss += loss.item()
        
        # Validate
        model.eval()
        preds, labels_all = [], []
        
        with torch.inference_mode():
            for hist, tgt, likes, views, labels in val_loader:
                hist = hist.to(cfg.DEVICE)
                tgt = tgt.to(cfg.DEVICE)
                likes = likes.to(cfg.DEVICE)
                views = views.to(cfg.DEVICE)
                
                logits = model(hist, tgt, likes, views)
                probs = torch.sigmoid(logits)
                preds.extend(probs.cpu().numpy())
                labels_all.extend(labels.numpy())
        
        val_auc = roc_auc_score(labels_all, preds)
        avg_loss = total_loss / len(train_loader)
        lr = optimizer.param_groups[0]['lr']
        
        print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}, AUC={val_auc:.4f}, LR={lr:.2e}")
        
        old_lr = lr
        scheduler.step(val_auc)
        new_lr = optimizer.param_groups[0]['lr']
        if new_lr < old_lr:
            print(f"  → LR reduced to {new_lr:.2e}")
        
        if val_auc > best_auc:
            best_auc = val_auc
            torch.save(model.state_dict(), best_path)
            print(f"  ✓ Best model saved (AUC: {val_auc:.4f})")
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= cfg.PATIENCE:
                print(f"\nEarly stop at epoch {epoch+1}")
                break
    
    print(f"\nBest AUC: {best_auc:.4f}\n")
    return best_path

model_path = train_model()

# ==================================================================================
# PART 5: Generate Submission
# ==================================================================================

def create_submission(model_path):
    """Generate final predictions"""
    
    print("="*70)
    print("PART 4: GENERATING SUBMISSION")
    print("="*70)
    
    test_ds = CTRDataset(cfg.TRAIN_DATA_PATH / "test.parquet", ID_MAPPING, test=True)
    test_loader = DataLoader(test_ds, batch_size=cfg.TRAIN_BATCH * 2, shuffle=False)
    
    model = CTRModel(EMB_WEIGHTS).to(cfg.DEVICE)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    
    predictions = []
    
    print("Predicting...")
    with torch.inference_mode():
        for hist, tgt, likes, views, _ in tqdm(test_loader):
            hist = hist.to(cfg.DEVICE)
            tgt = tgt.to(cfg.DEVICE)
            likes = likes.to(cfg.DEVICE)
            views = views.to(cfg.DEVICE)
            
            logits = model(hist, tgt, likes, views)
            probs = torch.sigmoid(logits)
            predictions.extend(probs.cpu().numpy())
    
    # Create submission (Task1&2 format)
    sub_df = pd.DataFrame({
        'ID': test_ds.ids,
        'Task1&2': predictions
    })
    
    print(f"\nSubmission shape: {sub_df.shape}")
    print(f"Columns: {sub_df.columns.tolist()}")
    
    csv_path = cfg.SUBMISSION_DIR / "prediction.csv"
    sub_df.to_csv(csv_path, index=False)
    
    zip_path = cfg.SUBMISSION_DIR / "prediction.zip"
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        zf.write(csv_path, 'prediction.csv')
    
    print(f"\nSubmission: {zip_path}")
    print(f"\nSample predictions:")
    print(sub_df.head(10))
    print("\n" + "="*70)
    print("TASK 1&2 COMPLETE!")
    print("="*70)
    
    return zip_path

final_submission = create_submission(model_path)

Setting up environment...
Environment ready!

Device: cuda
Output: /kaggle/working/submission_task1and2

PART 1: GENERATING MULTIMODAL EMBEDDINGS
Loading ViT-B/32...
Processing 91717 items

Extracting features...


Batches:   0%|          | 0/1434 [00:00<?, ?it/s]


Extracted: (91717, 512)
PCA: 512 → 128
Variance: 70.3%

Saved: /kaggle/working/task1and2_embeddings.parquet

PART 2: LOADING EMBEDDINGS FOR TRAINING
Embedding matrix: (91718, 128)

PART 3: TRAINING CTR MODEL
Training started...



Epoch 1:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>Exception ignored in: 
Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    
self._shutdown_workers()Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
        self._shutdown_workers()if w.is_alive():
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers

      if w.is_alive():
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive

      File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
assert self._par

Epoch 1: Loss=0.2175, AUC=0.8287, LR=4.00e-04
  ✓ Best model saved (AUC: 0.8287)


Epoch 2:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920><function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()    
self._shutdown_workers()  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers

      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
if w.is_alive():    
if w.is_alive(): 
            ^ ^^^^^^^^^^^^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
^    ^assert self._parent_pid == os.getpid(), 'can only test a child process'

  File "/usr/lib/python

Epoch 2: Loss=0.0875, AUC=0.8901, LR=4.00e-04
  ✓ Best model saved (AUC: 0.8901)


Epoch 3:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers

    Traceback (most recent call last):
if w.is_alive():  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__

      self._shutdown_workers() 
   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
      if w.is_alive(): 
^ ^ ^ ^^ ^ ^ ^ ^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
^    ^^assert self._parent_pid == os.getpid(), 'can only test a child process'^
 ^ ^^ ^ ^ 
   File "/usr/lib/

Epoch 3: Loss=0.0671, AUC=0.9021, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9021)


Epoch 4:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920><function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
self._shutdown_workers()    
if w.is_alive():  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers

     if w.is_alive(): 
           ^ ^^^^^^^^^^^^^^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    
assert self._parent_pid == os.getpid(), 'can only test a child process'  File "/usr/lib/python3

Epoch 4: Loss=0.0582, AUC=0.9117, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9117)


Epoch 5:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920><function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
        self._shutdown_workers()self._shutdown_workers()

  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
        if w.is_alive():if w.is_alive():

             ^ ^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
        assert self.

Epoch 5: Loss=0.0521, AUC=0.9163, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9163)


Epoch 6:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
<function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
Traceback (most recent call last):
      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
        self._shutdown_workers()if w.is_alive():
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers

     if w.is_alive():
            ^ ^^^^^^^^^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    ^assert self._parent_pid == os.getpid(), 'can only test a child process'^
^ ^ ^  
  File "/usr/lib/py

Epoch 6: Loss=0.0490, AUC=0.9222, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9222)


Epoch 7:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920><function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
Traceback (most recent call last):
      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
self._shutdown_workers()    self._shutdown_workers()

  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
        if w.is_alive():
if w.is_alive():
              ^^^^^^^^^^^^^^^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive

    assert self._parent_pid == os.getpid(), 'can only test a child process'  File "/usr/lib/python3

Epoch 7: Loss=0.0474, AUC=0.9226, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9226)


Epoch 8:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
Traceback (most recent call last):
Exception ignored in:   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
<function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
    self._shutdown_workers()Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    if w.is_alive():    
 self._shutdown_workers()   
   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
     if w.is_alive(): 
^ ^ ^ ^ ^ ^  ^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
^    assert self._parent_pid == os.getpid(), 'can only test a child process'^
 ^ ^ ^ ^ ^^  
    File "/usr/l

Epoch 8: Loss=0.0466, AUC=0.9273, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9273)


Epoch 9:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers

    Traceback (most recent call last):
if w.is_alive():  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__

      self._shutdown_workers()
   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
      if w.is_alive(): 
  ^^^  ^  ^^ ^ ^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
^    assert self._parent_pid == os.getpid(), 'can only test a child process'
^ ^ ^^ ^ ^
   File "/usr/lib/p

Epoch 9: Loss=0.0462, AUC=0.9289, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9289)


Epoch 10:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
Exception ignored in: Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__

    Traceback (most recent call last):
self._shutdown_workers()  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__

      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
self._shutdown_workers()    
if w.is_alive():  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers

     if w.is_alive(): 
         ^ ^ ^ ^^^^^^^^^^^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    ^assert self._parent_pid == os.getpid(), 'can only test a child process'

  File "/usr/lib/python

Epoch 12: Loss=0.0453, AUC=0.9343, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9343)


Epoch 13:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
<function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
        self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
self._shutdown_workers()    if w.is_alive():

   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
      if w.is_alive():  
    ^ ^^  ^ ^ ^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
^    ^assert self._parent_pid == os.getpid(), 'can only test a child process'^
^ 
    File "/usr/lib/pyt

Epoch 13: Loss=0.0451, AUC=0.9382, LR=4.00e-04
  ✓ Best model saved (AUC: 0.9382)


Epoch 14:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>
Exception ignored in: Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__

    Traceback (most recent call last):
self._shutdown_workers()  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__

      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
self._shutdown_workers()
      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
if w.is_alive():    if w.is_alive():

             ^ ^^^^^^^^^^^^^^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive

      File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
assert self._par

Epoch 14: Loss=0.0450, AUC=0.9344, LR=4.00e-04


Epoch 15:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920><function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
     if w.is_alive(): 
         ^ ^^ ^ ^^^^^^^^^^^^^^^^^
^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
^^
    assert self._parent_pid == os.getpid(), 'can only test a child process'
  File "/usr/lib/python

Epoch 15: Loss=0.0450, AUC=0.9358, LR=4.00e-04


Epoch 16:   0%|          | 0/1758 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920><function _MultiProcessingDataLoaderIter.__del__ at 0x789c994db920>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
      File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
self._shutdown_workers()    
self._shutdown_workers()  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
if w.is_alive():    
 if w.is_alive():
             ^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
        assert self.

Epoch 16: Loss=0.0449, AUC=0.9378, LR=4.00e-04
  → LR reduced to 2.20e-04

Early stop at epoch 16

Best AUC: 0.9382

PART 4: GENERATING SUBMISSION
Predicting...


  0%|          | 0/93 [00:00<?, ?it/s]


Submission shape: (379142, 2)
Columns: ['ID', 'Task1&2']

Submission: /kaggle/working/submission_task1and2/prediction.zip

Sample predictions:
   ID   Task1&2
0   0  0.649237
1   1  0.995930
2   2  0.837414
3   3  0.008924
4   4  0.006412
5   5  0.320951
6   6  0.998128
7   7  0.992280
8   8  0.010018
9   9  0.108894

TASK 1&2 COMPLETE!
