In [1]:
# STEP 0: Imports (tools we need)
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

# STEP 1: Paths (CHANGE ONLY THESE IF NEEDED)
TRAIN_DIR = "/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos"
TEST_DIR  = "/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/testing_videos"
   # test frames folder

# STEP 2: Image settings
IMG_SIZE = 128
BATCH_SIZE = 32
EPOCHS = 3   # keep small for fast run

# STEP 3: Image transform
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])

# STEP 4: Dataset loader
class FrameDataset(Dataset):
    def __init__(self, folder):
        self.files = []
        for root, _, filenames in os.walk(folder):
            for f in filenames:
                if f.endswith(".jpg") or f.endswith(".png"):
                    self.files.append(os.path.join(root, f))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(self.files[idx]).convert("RGB")
        img = transform(img)
        return img, self.files[idx]

# STEP 5: Autoencoder model
class AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 3, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# STEP 6: Load training data
train_dataset = FrameDataset(TRAIN_DIR)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoEncoder().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# STEP 7: Train (learn normal)
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for imgs, _ in tqdm(train_loader):
        imgs = imgs.to(device)
        recon = model(imgs)
        loss = criterion(recon, imgs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

# STEP 8: Test + anomaly score
test_dataset = FrameDataset(TEST_DIR)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

ids = []
scores = []

model.eval()
with torch.no_grad():
    for img, path in tqdm(test_loader):
        img = img.to(device)
        recon = model(img)
        error = torch.mean((img - recon) ** 2).item()

        # normalize roughly
        score = min(error * 10, 1.0)

        # build Id
        fname = os.path.basename(path[0])
        frame_id = os.path.splitext(fname)[0]
        ids.append(frame_id)
        scores.append(score)

# STEP 9: Save submission
submission = pd.DataFrame({
    "Id": ids,
    "Predicted": scores
})

submission.to_csv("submission.csv", index=False)
submission.head()


100%|██████████| 288/288 [02:44<00:00,  1.75it/s]


Epoch 1, Loss: 0.0138


100%|██████████| 288/288 [01:17<00:00,  3.71it/s]


Epoch 2, Loss: 0.0026


100%|██████████| 288/288 [01:20<00:00,  3.57it/s]


Epoch 3, Loss: 0.0015


100%|██████████| 11706/11706 [03:26<00:00, 56.68it/s]


Unnamed: 0,Id,Predicted
0,frame_00335,0.013602
1,frame_00247,0.013605
2,frame_00218,0.013788
3,frame_00012,0.018477
4,frame_00220,0.015265


In [2]:
import os

for root, dirs, files in os.walk("/kaggle/input"):
    print(root)


/kaggle/input
/kaggle/input/pixel-play-26
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/testing_vol
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_vol
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos/07
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos/10
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T112159Z-3-001/Avenue_Corrupted/Dataset/training_videos/05
/kaggle/input/pixel-play-26/Avenue_Corrupted-20251221T11