In [1]:
import albumentations as A
import os
import sys
import math
import time
import argparse
from pathlib import Path
from typing import List, Optional

import numpy as np
import pandas as pd

import cv2
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import timm
from albumentations.pytorch import ToTensorV2

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import KFold

from tqdm.notebook import tqdm

In [2]:
# --------------------------- utils ---------------------------

def seed_everything(seed=42):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def median_ape(y_true: np.ndarray, y_pred: np.ndarray, eps: float = 1e-9) -> float:
    # y_true and y_pred must be positive prices
    denom = np.maximum(y_true, eps)
    ape = np.abs(y_pred - y_true) / denom
    return float(np.median(ape))

In [3]:
# --------------------------- Dataset ---------------------------
class CarPricingDataset(Dataset):
    def __init__(
        self,
        df: pd.DataFrame,
        img_dir: str,
        image_transform=None,
        max_images: int = 4,
        is_train: bool = True,
        tab_cols: Optional[List[str]] = None,
    ):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.image_transform = image_transform
        self.max_images = max_images
        self.is_train = is_train
        self.tab_cols = tab_cols or []

    def __len__(self):
        return len(self.df)

    def _load_image(self, file_path):
        # returns HWC uint8
        try:
            img = cv2.imread(file_path)
            if img is None:
                # fallback: create blank image
                return np.zeros((self.image_transform.height, self.image_transform.width, 3), dtype=np.uint8)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            return img
        except Exception:
            # corrupted file
            h = getattr(self.image_transform, 'height', 384)
            w = getattr(self.image_transform, 'width', 384)
            return np.zeros((h, w, 3), dtype=np.uint8)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        listing_id = row['ID'] if 'ID' in row.index else row['index']

        images = []
        for i in range(self.max_images):
            img_path = os.path.join(self.img_dir, f"{listing_id}_{i}.jpg")
            if os.path.exists(img_path):
                img = self._load_image(img_path)
            else:
                # if image missing, append zero array (will be padded)
                img = None
            images.append(img)

        # if all None, create a single blank image
        if all([im is None for im in images]):
            # create one gray image and duplicate
            h = getattr(self.image_transform, 'height', 384)
            w = getattr(self.image_transform, 'width', 384)
            dummy = np.zeros((h, w, 3), dtype=np.uint8)
            images = [dummy for _ in range(self.max_images)]

        processed = []
        for im in images:
            if im is None:
                # zero image
                h = getattr(self.image_transform, 'height', 384)
                w = getattr(self.image_transform, 'width', 384)
                im = np.zeros((h, w, 3), dtype=np.uint8)
            if self.image_transform:
                augmented = self.image_transform(image=im)
                img_t = augmented['image']  # tensor CHW
            else:
                # simple to tensor
                im = cv2.resize(im, (224, 224))
                img_t = ToTensorV2()(image=im)['image']
            processed.append(img_t)

        # processed: list of tensors CHW
        images_stack = torch.stack(processed)  # (max_images, C, H, W)

        tab = torch.tensor(row[self.tab_cols].values.astype(np.float32)) if len(self.tab_cols) > 0 else torch.zeros(0)

        if self.is_train:
            target = torch.tensor(row['price_TARGET'], dtype=torch.float32)
            return {
                'images': images_stack,
                'tab': tab,
                'target': target
            }
        else:
            return {
                'images': images_stack,
                'tab': tab,
            }

In [4]:
# --------------------------- Model ---------------------------
class PricingModel(nn.Module):
    def __init__(self, encoder: nn.Module, img_emb_dim: int, tab_dim: int, head_hidden: int = 1024, dropout: float = 0.3):
        super().__init__()
        self.encoder = encoder
        self.img_emb_dim = img_emb_dim
        self.tab_dim = tab_dim

        in_dim = img_emb_dim + tab_dim
        self.head = nn.Sequential(
            nn.Linear(in_dim, head_hidden),
            nn.BatchNorm1d(head_hidden),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout(dropout),
            nn.Linear(head_hidden, head_hidden // 4),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout/2),
            nn.Linear(head_hidden // 4, 1),
        )

    def forward(self, images, tab):
        # images: (B, N_imgs, C, H, W)
        b, n, c, h, w = images.shape
        images = images.view(b * n, c, h, w)
        img_feats = self.encoder(images)  # (B * N, D)
        if img_feats.dim() == 4:
            # some encoders return (B, C, 1, 1)
            img_feats = img_feats.reshape(img_feats.shape[0], -1)
        img_feats = img_feats.view(b, n, -1)
        # simple mean pooling across images (you can replace with attention pooling)
        img_emb = img_feats.mean(dim=1)

        if tab.shape[1] == 0:
            x = img_emb
        else:
            x = torch.cat([img_emb, tab], dim=1)
        out = self.head(x)
        return out.squeeze(1)

In [5]:
# --------------------------- training/eval helpers ---------------------------
def train_one_epoch(model, encoder, loader, optimizer, scaler, device):
    model.train()
    running_loss = 0.0
    n = 0
    criterion = nn.MSELoss()
    for batch in tqdm(loader, total=len(loader), desc="Training"):
        images = batch['images'].to(device)  # (B, N, C, H, W)
        tab = batch['tab'].to(device)
        target = batch['target'].to(device)

        # target in log space
        y = torch.log1p(target)

        optimizer.zero_grad()
        with torch.amp.autocast("cuda", enabled=scaler is not None):
            preds = model(images, tab)
            loss = criterion(preds, y)
        if scaler is not None:
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=3.0)
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=3.0)
            optimizer.step()

        running_loss += loss.item() * images.size(0)
        n += images.size(0)
    return running_loss / max(1, n)


def validate(model, loader, device):
    model.eval()
    preds = []
    trues = []
    with torch.no_grad():
        for batch in tqdm(loader, total=len(loader), desc="Validating"):
            images = batch['images'].to(device)
            tab = batch['tab'].to(device)
            target = batch['target'].to(device)
            out = model(images, tab)
            preds.append(torch.expm1(out).cpu().numpy())
            trues.append(target.cpu().numpy())
    preds = np.concatenate(preds)
    trues = np.concatenate(trues)
    score = median_ape(trues, preds)
    return score, preds, trues

In [6]:
# --------------------------- main training pipeline ---------------------------
import albumentations as A
from albumentations.pytorch import ToTensorV2

def build_transforms(img_size=384, is_train=True):
    if is_train:
        return A.Compose([
            A.RandomResizedCrop(size=(img_size, img_size), scale=(0.7, 1.0), ratio=(0.9, 1.1), p=1.0),
            A.HorizontalFlip(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, p=0.5),
            A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.02, p=0.5),
            A.GaussianBlur(p=0.1),
            A.Normalize(),
            ToTensorV2()
        ])
    else:
        return A.Compose([
            A.Resize(height=img_size, width=img_size),
            A.Normalize(),
            ToTensorV2()
        ])


import ast
from sklearn.preprocessing import LabelEncoder, StandardScaler

def prepare_tabular(df_train, df_test, cat_cols, num_cols, multiselect_cols):
    df_all = pd.concat([df_train, df_test], axis=0).reset_index(drop=True)

    # ====== Обработка мультивыборных колонок ======
    def count_items(x):
        if isinstance(x, list):
            return len(x)
        elif isinstance(x, np.ndarray):
            return len(x)
        elif pd.isna(x) or x is None:
            return 0
        elif isinstance(x, str):
            # часто такие поля приходят в виде "['ABS','ESP']" или "[None]"
            try:
                parsed = ast.literal_eval(x)
                if isinstance(parsed, (list, tuple, np.ndarray)):
                    return len(parsed)
                else:
                    return 1  # строка с одним значением
            except Exception:
                return 1
        else:
            return 0

    for c in multiselect_cols:
        if c in df_all.columns:
            df_all[c + '_count'] = df_all[c].apply(count_items)
        else:
            df_all[c + '_count'] = 0

    # ====== Кодирование категориальных признаков ======
    encoders = {}
    for c in cat_cols:
        if c in df_all.columns:
            le = LabelEncoder()
            df_all[c] = df_all[c].astype(str).fillna("nan")
            df_all[c] = le.fit_transform(df_all[c])
            encoders[c] = le
        else:
            df_all[c] = 0  # если вдруг колонки нет

    # ====== Масштабирование числовых признаков ======
    scaler = StandardScaler()
    for c in num_cols:
        if c not in df_all.columns:
            df_all[c] = 0
        else:
            df_all[c] = df_all[c].apply(safe_to_float)

    df_all[num_cols] = scaler.fit_transform(df_all[num_cols].fillna(0))


    # ====== Разделение обратно на train/test ======
    train_proc = df_all.iloc[:len(df_train)].reset_index(drop=True)
    test_proc = df_all.iloc[len(df_train):].reset_index(drop=True)

    # итоговые табличные признаки
    tab_cols = cat_cols + num_cols + [c + '_count' for c in multiselect_cols]

    return train_proc, test_proc, tab_cols, encoders, scaler

def safe_to_float(x):
    if pd.isna(x):
        return np.nan
    if isinstance(x, (int, float, np.integer, np.floating)):
        return float(x)
    if isinstance(x, str):
        # убираем пробелы и плюсы
        x = x.replace(" ", "").replace("+", "")
        # убираем "км", "тыс", если есть
        for junk in ["км", "тыс", "Km", "KM", "km"]:
            x = x.replace(junk, "")
        try:
            return float(x)
        except:
            return np.nan
    return np.nan


In [7]:
seed_everything(927)

In [8]:
os.makedirs('./outputs', exist_ok=True)

In [9]:
train = pd.read_parquet('vseross/F/train_dataset.parquet')
test = pd.read_parquet('vseross/F/test_dataset.parquet')

In [10]:
if 'ID' not in train.columns:
    if 'index' in train.columns:
        train['ID'] = train['index']
    else:
        train = train.reset_index().rename(columns={'index': 'ID'})
if 'ID' not in test.columns:
    if 'index' in test.columns:
        test['ID'] = test['index']
    else:
        test = test.reset_index().rename(columns={'index': 'ID'})

In [11]:
test['price_TARGET'] = np.nan

In [12]:
cat_cols = ['body_type', 'drive_type', 'engine_type', 'color', 'pts', 'steering_wheel',
            'audiosistema', 'diski', 'electropodemniki', 'fary', 'salon', 'upravlenie_klimatom', 'usilitel_rul']
num_cols = ['mileage', 'crashes_count', 'owners_count', 'doors_number', 'latitude', 'longitude']
multiselect_cols = ['aktivnaya_bezopasnost_mult', 'audiosistema_mult', 'shini_i_diski_mult', 'electroprivod_mult',
                    'fary_mult', 'multimedia_navigacia_mult', 'obogrev_mult', 'pamyat_nastroek_mult',
                    'podushki_bezopasnosti_mult', 'pomosh_pri_vozhdenii_mult', 'protivoygonnaya_sistema_mult',
                    'salon_mult', 'upravlenie_klimatom_mult']

In [13]:
train_proc, test_proc, tab_cols, encoders, scaler = prepare_tabular(train, test, cat_cols, num_cols, multiselect_cols)

In [14]:
print('Tabular columns used:', tab_cols)

Tabular columns used: ['body_type', 'drive_type', 'engine_type', 'color', 'pts', 'steering_wheel', 'audiosistema', 'diski', 'electropodemniki', 'fary', 'salon', 'upravlenie_klimatom', 'usilitel_rul', 'mileage', 'crashes_count', 'owners_count', 'doors_number', 'latitude', 'longitude', 'aktivnaya_bezopasnost_mult_count', 'audiosistema_mult_count', 'shini_i_diski_mult_count', 'electroprivod_mult_count', 'fary_mult_count', 'multimedia_navigacia_mult_count', 'obogrev_mult_count', 'pamyat_nastroek_mult_count', 'podushki_bezopasnosti_mult_count', 'pomosh_pri_vozhdenii_mult_count', 'protivoygonnaya_sistema_mult_count', 'salon_mult_count', 'upravlenie_klimatom_mult_count']


In [15]:
model_name = 'swinv2_base_window16_256'
print('Creating encoder', model_name)
encoder = timm.create_model(model_name, pretrained=True, num_classes=0, global_pool='avg')

Creating encoder swinv2_base_window16_256


In [16]:
timm.list_models()

['aimv2_1b_patch14_224',
 'aimv2_1b_patch14_336',
 'aimv2_1b_patch14_448',
 'aimv2_3b_patch14_224',
 'aimv2_3b_patch14_336',
 'aimv2_3b_patch14_448',
 'aimv2_huge_patch14_224',
 'aimv2_huge_patch14_336',
 'aimv2_huge_patch14_448',
 'aimv2_large_patch14_224',
 'aimv2_large_patch14_336',
 'aimv2_large_patch14_448',
 'bat_resnext26ts',
 'beit3_base_patch16_224',
 'beit3_giant_patch14_224',
 'beit3_giant_patch14_336',
 'beit3_large_patch16_224',
 'beit_base_patch16_224',
 'beit_base_patch16_384',
 'beit_large_patch16_224',
 'beit_large_patch16_384',
 'beit_large_patch16_512',
 'beitv2_base_patch16_224',
 'beitv2_large_patch16_224',
 'botnet26t_256',
 'botnet50ts_256',
 'caformer_b36',
 'caformer_m36',
 'caformer_s18',
 'caformer_s36',
 'cait_m36_384',
 'cait_m48_448',
 'cait_s24_224',
 'cait_s24_384',
 'cait_s36_384',
 'cait_xs24_384',
 'cait_xxs24_224',
 'cait_xxs24_384',
 'cait_xxs36_224',
 'cait_xxs36_384',
 'coat_lite_medium',
 'coat_lite_medium_384',
 'coat_lite_mini',
 'coat_lite_sma

In [17]:
img_size = 256

In [18]:
train_transform = build_transforms(img_size=img_size, is_train=True)
valid_transform = build_transforms(img_size=img_size, is_train=False)

  original_init(self, **validated_kwargs)


In [19]:
train_transform.height = img_size
train_transform.width = img_size
valid_transform.height = img_size
valid_transform.width = img_size

In [20]:
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=927)

In [21]:
train_idxes = []
for train_idx, valid_idx in kf.split(train_proc):
    train_idxes.append((train_idx, valid_idx))
    break

In [22]:
train_idx, valid_idx = train_idxes[0]
df_tr = train_proc.loc[train_idx].reset_index(drop=True)
df_val = train_proc.loc[valid_idx].reset_index(drop=True)

In [23]:
train_ds = CarPricingDataset(df_tr, 'vseross/F/train_images', image_transform=train_transform, max_images=1, is_train=True, tab_cols=tab_cols)
val_ds = CarPricingDataset(df_val, 'vseross/F/train_images', image_transform=valid_transform, max_images=1, is_train=True, tab_cols=tab_cols)

In [24]:
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=8, shuffle=False, num_workers=0, pin_memory=True)

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [26]:
def infer_image_emb_dim(encoder, img_size, device):
    # create dummy and infer dimension
    encoder.to(device)
    encoder.eval()
    x = torch.randn(1, 3, img_size, img_size).to(device)
    with torch.no_grad():
        feat = encoder(x)
    if feat.dim() == 4:
        feat = feat.reshape(feat.shape[0], -1)
    return feat.shape[1]

In [27]:
dummy_enc = encoder
dummy_enc.to(device)
dummy_enc.eval()
img_emb_dim = infer_image_emb_dim(dummy_enc, img_size, device)
print('Image embedding dim:', img_emb_dim)

Image embedding dim: 1024


In [28]:
model = PricingModel(encoder=dummy_enc, img_emb_dim=img_emb_dim, tab_dim=len(tab_cols), head_hidden=1024, dropout=0.3)
model = model.to(device);

In [29]:
encoder_params = [p for n, p in model.encoder.named_parameters() if p.requires_grad]
head_params = [p for n, p in model.head.named_parameters() if p.requires_grad]

In [30]:
optimizer = torch.optim.AdamW([
    {'params': encoder_params, 'lr': 2e-5},
    {'params': head_params, 'lr': 2e-4}
], weight_decay=1e-2)

In [31]:
EPOCHS = 15

In [32]:
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

In [33]:
scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None

  scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None


In [None]:
best_score = 999
best_epoch = -1

for epoch in range(EPOCHS):
    t0 = time.time()
    train_loss = train_one_epoch(model, model.encoder, train_loader, optimizer, scaler, device)
    val_score, val_preds, val_trues = validate(model, val_loader, device)
    scheduler.step()
    elapsed = time.time() - t0
    print(f"Epoch {epoch+1}/{EPOCHS} - train_loss: {train_loss:.5f} - val_medianAPE: {val_score:.5f} - time: {elapsed:.1f}s")

    # save best
    if val_score < best_score:
        best_score = val_score
        best_epoch = epoch + 1
        torch.save(model.state_dict(), 'best_model.pt')
        print('Saved best model')

print('Training finished. Best val medianAPE:', best_score, 'at epoch', best_epoch)

Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 1/15 - train_loss: 2.84727 - val_medianAPE: 0.64364 - time: 869.8s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 2/15 - train_loss: 1.38894 - val_medianAPE: 0.48849 - time: 876.3s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 3/15 - train_loss: 1.11281 - val_medianAPE: 0.47350 - time: 876.7s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 4/15 - train_loss: 0.95597 - val_medianAPE: 0.38854 - time: 875.5s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 5/15 - train_loss: 0.83674 - val_medianAPE: 0.37002 - time: 874.4s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 6/15 - train_loss: 0.76051 - val_medianAPE: 0.35263 - time: 875.2s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 7/15 - train_loss: 0.69300 - val_medianAPE: 0.32533 - time: 872.5s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 8/15 - train_loss: 0.64405 - val_medianAPE: 0.30309 - time: 873.9s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 9/15 - train_loss: 0.59545 - val_medianAPE: 0.28417 - time: 874.2s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 10/15 - train_loss: 0.55871 - val_medianAPE: 0.26900 - time: 874.5s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

Validating:   0%|          | 0/1750 [00:00<?, ?it/s]

Epoch 11/15 - train_loss: 0.52430 - val_medianAPE: 0.25250 - time: 873.3s
Saved best model


Training:   0%|          | 0/3500 [00:00<?, ?it/s]

In [34]:
test_prepared = test_proc.copy()
test_ds = CarPricingDataset(test_prepared, 'vseross/F/test_images', image_transform=valid_transform, max_images=1, is_train=False, tab_cols=tab_cols)
test_loader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=0, pin_memory=True)

In [44]:
model.load_state_dict(torch.load('best_model.pt', map_location=device))
model.to(device)
model.eval();

In [45]:
preds = []
with torch.no_grad():
    for batch in tqdm(test_loader, total=len(test_loader), desc='Testing'):
        images = batch['images'].to(device)
        tab = batch['tab'].to(device)
        out = model(images, tab)
        preds.append(torch.expm1(out).cpu().numpy())
preds = np.concatenate(preds)

Testing:   0%|          | 0/1563 [00:00<?, ?it/s]

In [46]:
sample = pd.read_csv('vseross/F/sample_submission.csv')
sample = sample.set_index('ID')
sample.loc[test_prepared['ID'].values, 'target'] = preds
sample = sample.reset_index()
sample.to_csv('blablabla228.csv', index=False)

  sample.loc[test_prepared['ID'].values, 'target'] = preds


In [None]:
sample['target'] = sample['target'] * 0.5 + pd.read_csv('vit1.csv').target * 0.5

In [50]:
sample.to_csv('blend_blabla1488t.csv', index=False)