In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_properties(0))

True
_CudaDeviceProperties(name='Tesla P100-PCIE-16GB', major=6, minor=0, total_memory=16269MB, multi_processor_count=56, uuid=bcc61e3e-f44e-4103-a820-d47d74e1604f, L2_cache_size=4MB)


In [3]:
import os
import math
import io
import zipfile
import pandas as pd
import requests
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from transformers import ViTModel

2025-10-12 14:08:43.135724: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760278123.160226     108 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760278123.167710     108 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
# ==========================
# DATASET FOR IMAGE URLS + PRICE
# ==========================
class PriceRegressionDataset(Dataset):
    """
    Dataset for image files in a directory â†’ price regression.
    Expects a CSV with columns: sample_id, catalog_content, train_image_name, price
    Images are stored in a directory.
    """
    def __init__(self, csv_path, img_dir, transform=None, cache_dir="image_cache"):
        self.df = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.transform = transform
        self.cache_dir = cache_dir
        os.makedirs(cache_dir, exist_ok=True)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_name = row["train_image_name"]
        target = torch.tensor([row["price"]], dtype=torch.float32)
        # cache filename by sample_id
        fname = os.path.join(self.cache_dir, f"{row['sample_id']}.jpg")
        if not os.path.exists(fname):
            img_path = os.path.join(self.img_dir, image_name)
            img = Image.open(img_path).convert("RGB")
            img.save(fname)
        else:
            img = Image.open(fname).convert("RGB")
        if self.transform:
            img = self.transform(img)
        else:
            img = transforms.ToTensor()(img)
        return img, target


In [5]:

# ==========================
# VISION TRANSFORMER REGRESSION MODEL
# ==========================
class ViTRegressionModel(nn.Module):
    def __init__(self,
                 model_name='google/vit-base-patch16-224',
                 num_outputs=1,
                 freeze_backbone=False,
                 dropout_rate=0.1):
        super().__init__()
        self.vit = ViTModel.from_pretrained(model_name)
        if freeze_backbone:
            for p in self.vit.parameters():
                p.requires_grad = False
        hidden_size = self.vit.config.hidden_size
        self.reg_head = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size // 2, num_outputs)
        )
    def forward(self, pixel_values):
        out = self.vit(pixel_values=pixel_values)
        cls_feat = out.last_hidden_state[:, 0, :]
        return self.reg_head(cls_feat)

In [6]:

# ==========================
# TRAIN / EVAL FUNCTIONS
# ==========================
def train(model, loader, val_loader, device, epochs=5, lr=1e-4):
    model.to(device)
    optim_ = optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)
    crit = nn.MSELoss()
    sched = optim.lr_scheduler.StepLR(optim_, step_size=2, gamma=0.8)
    for ep in range(epochs):
        model.train()
        total, count = 0.0, 0
        for imgs, tgts in loader:
            imgs, tgts = imgs.to(device), tgts.to(device)
            optim_.zero_grad()
            pred = model(imgs)
            loss = crit(pred, tgts)
            loss.backward()
            optim_.step()
            total += loss.item() * imgs.size(0)
            count += imgs.size(0)
        train_rmse = math.sqrt(total / count)
        model.eval()
        total_val, count_val = 0.0, 0
        with torch.no_grad():
            for imgs, tgts in val_loader:
                imgs, tgts = imgs.to(device), tgts.to(device)
                pred = model(imgs)
                total_val += crit(pred, tgts).item() * imgs.size(0)
                count_val += imgs.size(0)
        val_rmse = math.sqrt(total_val / count_val)
        print(f"Epoch {ep+1}/{epochs} Train RMSE: {train_rmse:.4f} Val RMSE: {val_rmse:.4f}")
        sched.step()

def evaluate(model, loader, device):
    model.to(device).eval()
    preds, tgts = [], []
    with torch.no_grad():
        for imgs, tgt in loader:
            imgs = imgs.to(device)
            out = model(imgs).cpu()
            preds.append(out)
            tgts.append(tgt)
    preds = torch.cat(preds).squeeze().numpy()
    tgts = torch.cat(tgts).squeeze().numpy()
    rmse = math.sqrt(((preds - tgts) ** 2).mean())
    print(f"Test RMSE: {rmse:.4f}")
    return preds, tgts

In [7]:
# # ==========================
# # MAIN SCRIPT
# # ==========================
# if __name__ == "__main__":
#     CSV_PATH = "/kaggle/input/vit-train-csv/VIT_train_subset.csv" # path to your CSV file
#     IMG_DIR = "/kaggle/input/vit-images-train/train_images" # path to your image directory
#     BATCH_SIZE = 16
#     IMG_SIZE = 224
#     # Transforms
#     transform = transforms.Compose([
#         transforms.Resize((IMG_SIZE, IMG_SIZE)),
#         transforms.ToTensor(),
#         transforms.Normalize([0.485,0.456,0.406],
#                              [0.229,0.224,0.225])
#     ])
#     # Dataset & Dataloaders
#     dataset = PriceRegressionDataset(CSV_PATH, IMG_DIR, transform=transform)
#     n = len(dataset)
#     n_train = int(0.7 * n)
#     n_val = int(0.15 * n)
#     n_test = n - n_train - n_val
#     train_ds, val_ds, test_ds = torch.utils.data.random_split(
#         dataset, [n_train, n_val, n_test],
#         generator=torch.Generator().manual_seed(42)
#     )
#     train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
#     val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
#     test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)
#     # Model, training, evaluation
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     model = ViTRegressionModel(freeze_backbone=False)
#     train(model, train_loader, val_loader, device, epochs=5, lr=1e-4)
#     evaluate(model, test_loader, device)
#     # Save model
#     torch.save(model.state_dict(), "/kaggle/working/vit_price_regression.pth")
#     print("Saved model to vit_price_regression.pth")
   

In [8]:
if __name__ == "__main__":
    CSV_PATH = "/kaggle/input/vit-train-csv/VIT_train_subset.csv" # path to your CSV file
    IMG_DIR = "/kaggle/input/vit-images-train/train_images" # path to your image directory
    BATCH_SIZE = 16
    IMG_SIZE = 224
    # Transforms
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],
                             [0.229,0.224,0.225])
    ])
    print("Loading dataset...")
    # Dataset & Dataloaders
    dataset = PriceRegressionDataset(CSV_PATH, IMG_DIR, transform=transform)
    print(f"Dataset loaded with {len(dataset)} samples.")
    n = len(dataset)
    n_train = int(0.7 * n)
    n_val = int(0.15 * n)
    n_test = n - n_train - n_val
    print(f"Splitting dataset: Train={n_train}, Val={n_val}, Test={n_test}")
    train_ds, val_ds, test_ds = torch.utils.data.random_split(
        dataset, [n_train, n_val, n_test],
        generator=torch.Generator().manual_seed(42)
    )
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE)
    print("Data loaders created.")
    # Model, training, evaluation
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    model = ViTRegressionModel(freeze_backbone=False)
    print("Starting training...")
    train(model, train_loader, val_loader, device, epochs=5, lr=1e-4)
    print("Training completed.")
    print("Evaluating model...")
    evaluate(model, test_loader, device)
    print("Evaluation completed.")
    # Save model
    torch.save(model.state_dict(), "/kaggle/working/vit_price_regression.pth")
    print("Saved model to vit_price_regression.pth")

Loading dataset...
Dataset loaded with 25004 samples.
Splitting dataset: Train=17502, Val=3750, Test=3752
Data loaders created.
Using device: cuda


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...
Epoch 1/5 Train RMSE: 30.7800 Val RMSE: 27.5115
Epoch 2/5 Train RMSE: 29.7328 Val RMSE: 27.0518
Epoch 3/5 Train RMSE: 29.0698 Val RMSE: 26.8701
Epoch 4/5 Train RMSE: 28.3197 Val RMSE: 26.7390
Epoch 5/5 Train RMSE: 26.2783 Val RMSE: 27.5460
Training completed.
Evaluating model...
Test RMSE: 28.4041
Evaluation completed.
Saved model to vit_price_regression.pth
