In [1]:
!pip install torch torchvision transformers accelerate peft bitsandbytes open-clip-torch pandas numpy scikit-learn Pillow requests tqdm ftfy regex

!pip install tiktoken

!pip install sentencepiece

Collecting transformers
  Downloading transformers-4.57.0-py3-none-any.whl.metadata (41 kB)
Collecting accelerate
  Downloading accelerate-1.10.1-py3-none-any.whl.metadata (19 kB)
Collecting peft
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting open-clip-torch
  Downloading open_clip_torch-3.2.0-py3-none-any.whl.metadata (32 kB)
Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting regex
  Downloading regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.35.3-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting safetens

In [3]:
import os
import requests
import pandas as pd
import multiprocessing
import time
from functools import partial
from tqdm import tqdm
from requests.exceptions import ConnectionError, Timeout, RequestException

import re
import os
import pandas as pd
import multiprocessing
from time import time as timer
from tqdm import tqdm
import numpy as np
from pathlib import Path
from functools import partial
import requests
import urllib

def download_image(image_link, savefolder):
    if(isinstance(image_link, str)):
        filename = Path(image_link).name
        image_save_path = os.path.join(savefolder, filename)
        if(not os.path.exists(image_save_path)):
            try:
                urllib.request.urlretrieve(image_link, image_save_path)    
            except Exception as ex:
                print('Warning: Not able to download - {}\n{}'.format(image_link, ex))
        else:
            return
    return

def download_images(image_links, download_folder):
    if not os.path.exists(download_folder):
        os.makedirs(download_folder)
    results = []
    download_image_partial = partial(download_image, savefolder=download_folder)
    with multiprocessing.Pool(100) as pool:
        for result in tqdm(pool.imap(download_image_partial, image_links), total=len(image_links)):
            results.append(result)
        pool.close()
        pool.join()


# === CONFIG ===
TRAIN_CSV = "train.csv"
TEST_CSV = "test.csv"
IMG_FOLDER = "images"
IMAGE_COLUMN = "image_link"  # Change if needed (e.g., 'image_url')

# === LOAD DATA ===
print("📥 Loading CSV files...")
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

# === EXTRACT LINKS ===
train_links = train_df[IMAGE_COLUMN].dropna().tolist()
test_links = test_df[IMAGE_COLUMN].dropna().tolist()

# === DOWNLOAD TRAIN IMAGES ===
print(f"🚀 Downloading {len(train_links)} training images...")
download_images(train_links, IMG_FOLDER)

# === DOWNLOAD TEST IMAGES ===
print(f"🚀 Downloading {len(test_links)} test images...")
download_images(test_links, IMG_FOLDER)

print("✅ All downloads completed!")

📥 Loading CSV files...
🚀 Downloading 75000 training images...


 52%|█████▏    | 38882/75000 [00:21<00:19, 1868.95it/s]

HTTP Error 404: Not Found


100%|██████████| 75000/75000 [00:40<00:00, 1864.99it/s]


🚀 Downloading 75000 test images...


 56%|█████▌    | 41896/75000 [00:21<00:16, 2041.44it/s]

HTTP Error 404: Not Found


100%|██████████| 75000/75000 [00:39<00:00, 1904.45it/s]


✅ All downloads completed!


In [4]:
# config.py
from pathlib import Path

class Config:
    # ===== Model Config =====
    text_model_name = "microsoft/deberta-v3-large"
    vision_model_name = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
    fusion_hidden_dim = 2048
    dropout = 0.2

    # ===== PEFT / LoRA =====
    lora_r = 16
    lora_alpha = 32
    lora_dropout = 0.1

    # ===== Quantization =====
    use_8bit = False   # Set to False if using fp32
    use_4bit = False   # Set to False if using fp32
    use_fp16 = False
    use_fp32 = True # Enable fp32 precision


    # ===== Training =====
    batch_size = 32
    learning_rate = 2e-4
    num_epochs = 10
    warmup_ratio = 0.1
    weight_decay = 0.01
    max_grad_norm = 1.0
    accumulation_steps = 32

    # ===== Data =====
    max_text_length = 512
    image_size = 224

    # ===== Paths =====
    base_dir = Path("./")
    train_csv = Path("./train.csv")
    test_csv = Path("./test.csv")
    image_folder = Path("./images")

    # ===== System =====
    num_workers = 256


In [5]:
# dataset.py
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer
from PIL import Image
import open_clip
import pandas as pd
import numpy as np
import re, os, requests
from pathlib import Path
from transformers import DebertaV2Tokenizer

# -----------------------------
# Text Feature Extraction
# -----------------------------
def extract_advanced_features(text: str):
    """Extract structured information and create enhanced text."""
    features = {}

    # Basic extractions
    item_name_match = re.search(r'Item Name:\s*(.*?)(?:\n|$)', text)
    value_match = re.search(r'Value:\s*([0-9.]+)', text)
    unit_match = re.search(r'Unit:\s*(.*?)(?:\n|$)', text)

    features['item_name'] = item_name_match.group(1).strip() if item_name_match else ""
    features['value'] = float(value_match.group(1)) if value_match else 1.0
    features['unit'] = unit_match.group(1).strip() if unit_match else ""

    # Bullet points
    bullet_points = re.findall(r'Bullet Point \d+:\s*(.*?)(?:\n|$)', text)
    features['bullet_points'] = bullet_points
    features['num_bullet_points'] = len(bullet_points)

    # Description
    desc_match = re.search(r'Product Description:\s*(.*?)(?:\nValue:|$)', text, re.DOTALL)
    features['description'] = desc_match.group(1).strip() if desc_match else ""

    # Enhanced text construction
    enhanced_text = f"Product: {features['item_name']}"
    if bullet_points:
        enhanced_text += f". Key features: {' '.join(bullet_points[:3])}"
    if features['description']:
        desc_preview = features['description'][:100] + "..." if len(features['description']) > 100 else features['description']
        enhanced_text += f". Description: {desc_preview}"
    if features['value'] > 0:
        enhanced_text += f". Package contains: {features['value']} {features['unit']}"

    # Additional features
    features['text_length'] = len(text)
    features['has_bullet_points'] = len(bullet_points) > 0
    features['has_description'] = len(features['description']) > 0

    return enhanced_text, features

# -----------------------------
# Image Download with Retry
# -----------------------------
def download_image_with_retry(url, save_path, max_retries=3):
    for attempt in range(max_retries):
        try:
            if os.path.exists(save_path):
                return True
            response = requests.get(url, timeout=10, stream=True)
            if response.status_code == 200:
                with open(save_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                return True
        except Exception as e:
            if attempt == max_retries - 1:
                print(f"❌ Failed to download {url}: {e}")
    return False

# -----------------------------
# Dataset Class
# -----------------------------
class AdvancedProductDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, is_training=True):
        self.dataframe = dataframe.reset_index(drop=True)
        self.is_training = is_training

        # Tokenizer
        self.text_tokenizer = DebertaV2Tokenizer.from_pretrained(Config.text_model_name)
        if self.text_tokenizer.pad_token is None:
            self.text_tokenizer.pad_token = self.text_tokenizer.eos_token

        # CLIP Preprocess
        self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms(
            'ViT-H-14', pretrained='laion2b_s32b_b79k'
        )
        self.image_transform = self.clip_preprocess

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        sample_id = row['sample_id']

        # Text features
        enhanced_text, features = extract_advanced_features(row['catalog_content'])
        text_inputs = self.text_tokenizer(
            enhanced_text,
            max_length=Config.max_text_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Image processing
        image_link = row['image_link']
        image_filename = Path(image_link).name
        image_path = Config.image_folder / image_filename
        if not image_path.exists():
            download_image_with_retry(image_link, image_path)
        try:
            image = Image.open(image_path).convert('RGB')
            image = self.image_transform(image)
        except Exception:
            image = torch.zeros(3, Config.image_size, Config.image_size)

        # Numerical features
        numerical_features = torch.tensor([
            features['value'],
            features['num_bullet_points'],
            features['text_length'] / 1000.0,
            1.0 if features['has_bullet_points'] else 0.0,
            1.0 if features['has_description'] else 0.0
        ], dtype=torch.float32)

        sample = {
            'input_ids': text_inputs['input_ids'].squeeze(),
            'attention_mask': text_inputs['attention_mask'].squeeze(),
            'image': image,
            'numerical_features': numerical_features,
            'sample_id': sample_id
        }

        if self.is_training and 'price' in row:
            sample['price'] = torch.tensor(float(row['price']), dtype=torch.float32)
        return sample

In [6]:
# dataset.py
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer
from PIL import Image
import open_clip
import pandas as pd
import numpy as np
import re, os, requests
from pathlib import Path
from transformers import DebertaV2Tokenizer

# -----------------------------
# Text Feature Extraction
# -----------------------------
def extract_advanced_features(text: str):
    """Extract structured information and create enhanced text."""
    features = {}

    # Basic extractions
    item_name_match = re.search(r'Item Name:\s*(.*?)(?:\n|$)', text)
    value_match = re.search(r'Value:\s*([0-9.]+)', text)
    unit_match = re.search(r'Unit:\s*(.*?)(?:\n|$)', text)

    features['item_name'] = item_name_match.group(1).strip() if item_name_match else ""
    features['value'] = float(value_match.group(1)) if value_match else 1.0
    features['unit'] = unit_match.group(1).strip() if unit_match else ""

    # Bullet points
    bullet_points = re.findall(r'Bullet Point \d+:\s*(.*?)(?:\n|$)', text)
    features['bullet_points'] = bullet_points
    features['num_bullet_points'] = len(bullet_points)

    # Description
    desc_match = re.search(r'Product Description:\s*(.*?)(?:\nValue:|$)', text, re.DOTALL)
    features['description'] = desc_match.group(1).strip() if desc_match else ""

    # Enhanced text construction
    enhanced_text = f"Product: {features['item_name']}"
    if bullet_points:
        enhanced_text += f". Key features: {' '.join(bullet_points[:3])}"
    if features['description']:
        desc_preview = features['description'][:100] + "..." if len(features['description']) > 100 else features['description']
        enhanced_text += f". Description: {desc_preview}"
    if features['value'] > 0:
        enhanced_text += f". Package contains: {features['value']} {features['unit']}"

    # Additional features
    features['text_length'] = len(text)
    features['has_bullet_points'] = len(bullet_points) > 0
    features['has_description'] = len(features['description']) > 0

    return enhanced_text, features

# -----------------------------
# Image Download with Retry
# -----------------------------
def download_image_with_retry(url, save_path, max_retries=3):
    for attempt in range(max_retries):
        try:
            if os.path.exists(save_path):
                return True
            response = requests.get(url, timeout=10, stream=True)
            if response.status_code == 200:
                with open(save_path, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                return True
        except Exception as e:
            if attempt == max_retries - 1:
                print(f"❌ Failed to download {url}: {e}")
    return False

# -----------------------------
# Dataset Class
# -----------------------------
class AdvancedProductDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, is_training=True):
        self.dataframe = dataframe.reset_index(drop=True)
        self.is_training = is_training

        # Tokenizer
        self.text_tokenizer = DebertaV2Tokenizer.from_pretrained(Config.text_model_name)
        if self.text_tokenizer.pad_token is None:
            self.text_tokenizer.pad_token = self.text_tokenizer.eos_token

        # CLIP Preprocess
        self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms(
            'ViT-H-14', pretrained='laion2b_s32b_b79k'
        )
        self.image_transform = self.clip_preprocess

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        sample_id = row['sample_id']

        # Text features
        enhanced_text, features = extract_advanced_features(row['catalog_content'])
        text_inputs = self.text_tokenizer(
            enhanced_text,
            max_length=Config.max_text_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Image processing
        image_link = row['image_link']
        image_filename = Path(image_link).name
        image_path = Config.image_folder / image_filename
        if not image_path.exists():
            download_image_with_retry(image_link, image_path)
        try:
            image = Image.open(image_path).convert('RGB')
            image = self.image_transform(image)
        except Exception:
            image = torch.zeros(3, Config.image_size, Config.image_size)

        # Numerical features
        numerical_features = torch.tensor([
            features['value'],
            features['num_bullet_points'],
            features['text_length'] / 1000.0,
            1.0 if features['has_bullet_points'] else 0.0,
            1.0 if features['has_description'] else 0.0
        ], dtype=torch.float32)

        sample = {
            'input_ids': text_inputs['input_ids'].squeeze(),
            'attention_mask': text_inputs['attention_mask'].squeeze(),
            'image': image,
            'numerical_features': numerical_features,
            'sample_id': sample_id
        }

        if self.is_training and 'price' in row:
            sample['price'] = torch.tensor(float(row['price']), dtype=torch.float32)
        return sample

In [7]:

import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoModel, AutoModelForImageClassification
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType

# ---- Optimized Model Definition ----
class AdvancedProductPricePredictor(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Text encoder (DeBERTa or other transformer model)
        self.text_encoder = AutoModel.from_pretrained(
            Config.text_model_name,
            torch_dtype=torch.float16 if Config.use_8bit else torch.float32,
        )

        if Config.use_8bit or Config.use_4bit:
            self.text_encoder = prepare_model_for_kbit_training(self.text_encoder)

        # Apply LoRA (Low-Rank Adaptation)
        lora_config = LoraConfig(
            r=Config.lora_r,
            lora_alpha=Config.lora_alpha,
            target_modules=["query_proj", "value_proj", "key_proj", "dense"],
            lora_dropout=Config.lora_dropout,
            bias="none",
            task_type=TaskType.FEATURE_EXTRACTION
        )
        self.text_encoder = get_peft_model(self.text_encoder, lora_config)

        # Vision encoder (CLIP model for image classification)
        self.vision_encoder = AutoModelForImageClassification.from_pretrained(
            Config.vision_model_name,
            torch_dtype=torch.float16 if Config.use_8bit else torch.float32,
        )

        # Extract feature dimension from the vision model output
        vision_feature_dim = self.vision_encoder.vision_model.config.hidden_size

        # Fusion layer to combine text, vision, and numeric features
        self.fusion_layer = nn.Sequential(
            nn.Linear(self.text_encoder.config.hidden_size + vision_feature_dim + 5, Config.fusion_hidden_dim),
            nn.ReLU(),
            nn.Dropout(Config.dropout),
            nn.Linear(Config.fusion_hidden_dim, 1)
        )

    def forward(self, input_ids, attention_mask, image, numerical_features):
        # Text embeddings
        text_emb = self.text_encoder(input_ids, attention_mask=attention_mask).last_hidden_state.mean(dim=1)

        # Vision embeddings (use CLIP's `vision_model` to extract features)
        vision_output = self.vision_encoder.vision_model(image)
        image_emb = vision_output.last_hidden_state.mean(dim=1)  # Assuming last_hidden_state is the output

        # Combine text, image, and numeric features
        combined = torch.cat([text_emb, image_emb, numerical_features], dim=1)

        # Price prediction
        price_pred = self.fusion_layer(combined)
        return price_pred

# ---- Optimizer ----
def get_optimizer(model):
    return optim.AdamW(model.parameters(), lr=Config.learning_rate, weight_decay=Config.weight_decay)



In [8]:
import torch
from torch.utils.data import DataLoader, random_split
import pandas as pd
from tqdm import tqdm
from accelerate import Accelerator
import numpy as np
import os

# ===============================
# SMAPE Metric
# ===============================
def smape(y_true, y_pred):
    """Symmetric Mean Absolute Percentage Error"""
    diff = torch.abs(y_true - y_pred)
    denominator = (torch.abs(y_true) + torch.abs(y_pred)) / 2.0
    return torch.mean(2.0 * diff / (denominator + 1e-8)).item()


# ===============================
# Training Function
# ===============================
def train_model(model, train_loader, val_loader, checkpoint_path='best_model.pth', resume_checkpoint=False):
    accelerator = Accelerator(mixed_precision='no')
    optimizer = get_optimizer(model)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=len(train_loader) * 2, T_mult=2
    )

    model, optimizer, train_loader, val_loader = accelerator.prepare(model, optimizer, train_loader, val_loader)
    loss_fn = torch.nn.HuberLoss()

    best_val_loss = float('inf')
    patience = 0

    # ====== Load checkpoint if exists ======
    if resume_checkpoint and os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        best_val_loss = checkpoint['best_val_loss']
        print("✅ Loaded checkpoint from", checkpoint_path)

    # ====== Training Loop ======
    for epoch in range(Config.num_epochs):
        model.train()
        total_loss = 0.0
        total_smape = 0.0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{Config.num_epochs}"):
            optimizer.zero_grad()

            with torch.cuda.amp.autocast(dtype=torch.float16):
                preds = model(
                    batch['input_ids'], batch['attention_mask'], batch['image'], batch['numerical_features']
                )
                loss = loss_fn(preds.squeeze(), batch['price'])

            accelerator.backward(loss)
            optimizer.step()
            scheduler.step()

            total_loss += loss.item()
            total_smape += smape(batch['price'], preds.squeeze())

        avg_train_loss = total_loss / len(train_loader)
        avg_train_smape = total_smape / len(train_loader)

        # ====== Validation ======
        model.eval()
        val_loss = 0.0
        val_smape = 0.0

        with torch.no_grad():
            for batch in val_loader:
                with torch.cuda.amp.autocast(dtype=torch.float16):
                    preds = model(
                        batch['input_ids'], batch['attention_mask'], batch['image'], batch['numerical_features']
                    )
                    loss = loss_fn(preds.squeeze(), batch['price'])
                    val_loss += loss.item()
                    val_smape += smape(batch['price'], preds.squeeze())

        val_loss /= len(val_loader)
        avg_val_smape = val_smape / len(val_loader)

        print(
            f"Epoch {epoch+1}: "
            f"Train Loss={avg_train_loss:.4f}, Train SMAPE={avg_train_smape:.4f}, "
            f"Val Loss={val_loss:.4f}, Val SMAPE={avg_val_smape:.4f}"
        )

        # ====== Save Best Checkpoint ======
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience = 0
            accelerator.wait_for_everyone()
            unwrapped_model = accelerator.unwrap_model(model)
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': unwrapped_model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_val_loss': best_val_loss,
            }, checkpoint_path)
            print(f"✅ Saved best model at epoch {epoch+1}")
        else:
            patience += 1
            if patience >= 5:
                print("⏹️ Early stopping triggered.")
                break

    return accelerator.unwrap_model(model)


# ===============================
# Main Function
# ===============================
def main():
    # ✅ Load only the training CSV
    full_df = pd.read_csv(Config.train_csv)
    print(f"Loaded {len(full_df)} training samples.")

    # ✅ Split 90% train, 10% validation
    val_size = int(0.1 * len(full_df))
    train_size = len(full_df) - val_size
    train_df, val_df = random_split(full_df, [train_size, val_size])

    # Convert Subsets back to DataFrames
    train_df = full_df.iloc[train_df.indices]
    val_df = full_df.iloc[val_df.indices]

    # ✅ Create datasets
    train_ds = AdvancedProductDataset(train_df, is_training=True)
    val_ds = AdvancedProductDataset(val_df, is_training=True)  # val has price → keep is_training=True

    # ✅ DataLoaders
    train_loader = DataLoader(train_ds, batch_size=Config.batch_size, shuffle=True,
                              num_workers=Config.num_workers, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=Config.batch_size, shuffle=False,
                            num_workers=Config.num_workers, pin_memory=True)

    # ✅ Initialize model
    model = AdvancedProductPricePredictor()

    # ✅ Train
    trained_model = train_model(
        model,
        train_loader,
        val_loader,
        checkpoint_path="best_model.pth",
        resume_checkpoint=True
    )

    # ✅ Save final model
    torch.save(trained_model.state_dict(), "final_model.pth")
    print("🎯 Final model saved as final_model.pth")


if __name__ == "__main__":
    main()


Loaded 75000 training samples.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/580 [00:00<?, ?B/s]

open_clip_model.safetensors:   0%|          | 0.00/3.94G [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


pytorch_model.bin:   0%|          | 0.00/874M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/874M [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/3.94G [00:00<?, ?B/s]

Some weights of CLIPForImageClassification were not initialized from the model checkpoint at laion/CLIP-ViT-H-14-laion2B-s32B-b79K and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  with torch.cuda.amp.autocast(dtype=torch.float16):
Epoch 1/10: 100%|██████████| 2110/2110 [19:32<00:00,  1.80it/s]
  with torch.cuda.amp.autocast(dtype=torch.float16):


Epoch 1: Train Loss=12.4310, Train SMAPE=1.1539, Val Loss=10.9667, Val SMAPE=1.0154
✅ Saved best model at epoch 1


Epoch 2/10: 100%|██████████| 2110/2110 [19:28<00:00,  1.81it/s]


Epoch 2: Train Loss=10.0399, Train SMAPE=0.9637, Val Loss=10.2959, Val SMAPE=0.9571
✅ Saved best model at epoch 2


Epoch 3/10: 100%|██████████| 2110/2110 [19:26<00:00,  1.81it/s]


Epoch 3: Train Loss=11.5163, Train SMAPE=1.0846, Val Loss=11.5541, Val SMAPE=1.0797


Epoch 4/10: 100%|██████████| 2110/2110 [19:26<00:00,  1.81it/s]


Epoch 4: Train Loss=9.8360, Train SMAPE=0.9605, Val Loss=10.3184, Val SMAPE=0.9794


Epoch 5/10: 100%|██████████| 2110/2110 [19:28<00:00,  1.81it/s]


Epoch 5: Train Loss=8.7056, Train SMAPE=0.8770, Val Loss=9.6513, Val SMAPE=0.9048
✅ Saved best model at epoch 5


Epoch 6/10: 100%|██████████| 2110/2110 [19:30<00:00,  1.80it/s]


Epoch 6: Train Loss=7.9920, Train SMAPE=0.8248, Val Loss=9.4989, Val SMAPE=0.9010
✅ Saved best model at epoch 6


Epoch 7/10: 100%|██████████| 2110/2110 [19:30<00:00,  1.80it/s]


Epoch 7: Train Loss=9.6425, Train SMAPE=0.9667, Val Loss=10.5129, Val SMAPE=0.9815


Epoch 8/10: 100%|██████████| 2110/2110 [19:28<00:00,  1.81it/s]


Epoch 8: Train Loss=8.9787, Train SMAPE=0.9034, Val Loss=10.6892, Val SMAPE=0.9664


Epoch 9/10:   0%|          | 0/2110 [00:00<?, ?it/s]Exception ignored in: <function _releaseLock at 0x7ca72a7239c0>
Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.12/logging/__init__.py", line 243, in _releaseLock
    def _releaseLock():
    
KeyboardInterrupt: 
Epoch 9/10:   0%|          | 0/2110 [00:31<?, ?it/s]


RuntimeError: DataLoader worker (pid(s) 194470, 194471, 194472, 194473, 194474, 194475, 194476, 194477, 194478, 194479, 194480, 194481, 194502, 194516, 194621, 194733, 194841, 194847, 194848, 194849, 194850, 194851, 194852, 194853, 194854, 194855, 194856, 194857, 194858, 194859, 194860, 194861, 194862, 194863, 194864, 194865, 194866, 194867, 194868, 194869, 194870, 194871, 194872, 194873, 194874, 194875, 194876, 194877, 194878, 194879, 194880, 194881, 194882, 194883, 194885, 194886, 194887, 194888, 194889, 194890, 194891, 194892, 194893, 194894, 194895, 194896, 194897, 194898, 194899, 194900, 194901, 194902, 194903, 194904, 194905, 194931, 194961, 195068, 195179, 195270, 195271, 195272, 195273, 195274, 195275, 195276, 195277, 195278, 195279, 195280, 195281, 195282, 195283, 195284, 195285, 195286, 195287, 195288, 195289, 195290, 195291, 195292, 195293, 195294, 195295, 195296, 195297, 195298, 195299, 195300, 195301, 195302, 195303, 195304, 195305, 195306, 195307, 195308, 195309, 195310, 195311, 195312, 195313, 195314, 195315, 195316, 195317, 195318, 195319, 195320, 195321, 195322, 195323, 195324, 195325, 195326, 195327, 195353, 195433, 195542) exited unexpectedly

In [9]:
import torch
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from tqdm import tqdm

def run_inference(model_path="best_model.pth", output_csv="test_out.csv"):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = AdvancedProductPricePredictor()
    model.load_state_dict(torch.load(model_path, map_location=device)['model_state_dict'])
    model.to(device)
    model.eval()

    # Load the test data
    test_df = pd.read_csv(Config.test_csv)
    test_ds = AdvancedProductDataset(test_df, is_training=False)
    test_loader = DataLoader(test_ds, batch_size=Config.batch_size, shuffle=False, num_workers=4)

    preds, ids = [], []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Predicting prices"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            image = batch['image'].to(device)
            num_feat = batch['numerical_features'].to(device)

            # Perform inference (predict price)
            price = model(input_ids, attention_mask, image, num_feat)
            
            # Convert predictions from tensor to float
            preds.extend(price.squeeze().cpu().numpy())

            # Extract sample IDs (ensure these are the correct IDs for the batch)
            ids.extend(batch['sample_id'].cpu().numpy())  # Use .cpu() if needed

    # Ensure predictions are non-negative (or apply any other logic)
    preds = np.maximum(preds, 0.1)  # Ensures the minimum price is 0.1

    # Create a DataFrame for the output
    output_df = pd.DataFrame({'sample_id': ids, 'price': preds})

    # Save the predictions to a CSV file
    output_df.to_csv(output_csv, index=False)
    print(f"✅ Inference complete. Saved predictions to {output_csv}")

if __name__ == "__main__":
    run_inference()


Some weights of CLIPForImageClassification were not initialized from the model checkpoint at laion/CLIP-ViT-H-14-laion2B-s32B-b79K and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Predicting prices: 100%|██████████| 2344/2344 [25:58<00:00,  1.50it/s]


✅ Inference complete. Saved predictions to test_out.csv
