In [None]:
import torch
from torch import nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [None]:
df_train = pd.read_csv("rocar_train.csv", dtype={"unique_id": str})
df_test = pd.read_csv("rocar_test.csv", dtype={"unique_id": str})
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
df_train.head()

In [None]:
df_train.info()

In [None]:
STRUCTURED_COLUMNS = [
    "km",
    "putere",
    "capacitate cilindrica",
    "anul producției",
    "marca",
    "model",
    "combustibil",
    "tip caroserie",
    "firma",
    "is_automatic",
]

train_structured_data = df_train[STRUCTURED_COLUMNS].to_numpy()
test_structured_data = df_test[STRUCTURED_COLUMNS].to_numpy()

train_targets = df_train["price"].to_numpy()
test_targets = df_test["price"].to_numpy()

train_structured_data.shape, test_structured_data.shape, train_targets.shape, test_targets.shape

In [None]:
train_image_features = np.load("train_image_features.npy")
test_image_features = np.load("test_image_features.npy")

train_text_features = np.load("train_text_features.npy")
test_text_features = np.load("test_text_features.npy")

len(train_image_features), len(train_text_features), len(test_image_features), len(test_text_features)

In [None]:
class MultimodalDataset(Dataset):
    def __init__(self, image_features, text_features, structured_data, targets):
        self.image_features = image_features
        self.text_features = text_features
        self.structured_data = structured_data
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        # Use the entire sequence representation
        # image = torch.tensor(self.image_features[idx], dtype=torch.float32)
        # text = torch.tensor(self.text_features[idx], dtype=torch.float32)

        # Use only the [CLS] token representation
        image = torch.tensor([self.image_features[idx][0]], dtype=torch.float32)
        text = torch.tensor([self.text_features[idx][0]], dtype=torch.float32)

        structured = torch.tensor(self.structured_data[idx], dtype=torch.float32)
        target = torch.tensor(self.targets[idx], dtype=torch.float32)

        features = torch.cat([image, text, structured])

        return features, target


train_dataset = MultimodalDataset(train_image_features, train_text_features, train_structured_data, train_targets)
test_dataset = MultimodalDataset(test_image_features, test_text_features, test_structured_data, test_targets)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

trainSteps = len(train_loader.dataset) // 8
testSteps = len(test_loader.dataset) // 8
history = {"train_loss": [], "test_loss": []}

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Use only the [CLS] token representation
        # self.fc1 = nn.Linear(12, 64)
        # self.fc2 = nn.Linear(512, 256)
        # self.fc3 = nn.Linear(256, 1)

        # Try different architectures
        self.fc1 = nn.Linear(12, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 1)

        # Use the entire sequence representation
        # self.fc1 = nn.Linear(1546, 512)
        # self.fc2 = nn.Linear(512, 256)
        # self.fc3 = nn.Linear(256, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model = Net().to(device)
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, min_lr=1e-9, patience=5)

In [None]:
for epoch in range(200):
    model.train()
    running_loss = 0.0

    for features, target in train_loader:
        features = features.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, target.view(-1, 1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * features.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    history["train_loss"].append(epoch_loss)
    print(f"Epoch {epoch + 1} Train Loss: {epoch_loss:.4f}")

    # Adjust learning rate based on the epoch loss
    scheduler.step(epoch_loss)

    # Validation loop (optional, but recommended)
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        valid_loss = 0.0
        for features, target in test_loader:
            features = features.to(device)
            target = target.to(device)
            outputs = model(features)
            loss = criterion(outputs, target.view(-1, 1))  # Ensure 'target' is defined for test data

            valid_loss += loss.item() * features.size(0)

        valid_loss /= len(test_loader.dataset)
        history["test_loss"].append(valid_loss)
        print(f"Epoch {epoch + 1} Validation Loss: {valid_loss:.4f}")

In [None]:
import matplotlib.pyplot as plt

plt.plot(history["train_loss"], label="train loss")
plt.plot(history["test_loss"], label="test loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()