In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir("/content/drive/My Drive/CDC_Project")
# Now you can list files in that directory
!ls


 Baseline_models_and_Grad-Cam18.ipynb   preprocessing18.ipynb
 Baseline_models_and_Grad-Cam.ipynb     preprocessing.ipynb
 best_multimodal_model.pt	        __pycache__
 bestprice_multimodal_model.pt	        tabular_model_results.gdoc
'Copy of training_loop18.ipynb'         tabular_preprocessed.joblib
 Mapbox_api.ipynb		        test2.csv
 map_images			       'train(1).csv'
 model_training.ipynb		        train_config.json
 modular.py			        training_loop18.ipynb
 outputs			        training_loop.ipynb


In [3]:
import joblib

bundle = joblib.load("tabular_preprocessed.joblib")

X_train = bundle["X_train_final"]
y_train = bundle["y_train"]
X_val = bundle["X_val_final"]
y_val = bundle["y_val"]
train_ids = bundle["train_ids"]
val_ids = bundle["val_ids"]


In [4]:
import json
with open("train_config.json") as f:
    cfg = json.load(f)


In [5]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from PIL import Image
import os

class MultimodalDataset(Dataset):
    def __init__(self, image_dir, tabular_data, ids, targets=None, transform=None):
        self.image_dir = image_dir
        self.tabular_data = tabular_data
        self.ids = ids
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        sample_id = self.ids[idx]
        img_path = os.path.join(self.image_dir, f"{sample_id}.png")

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        tabular = torch.tensor(self.tabular_data[idx], dtype=torch.float32)

        if self.targets is not None:
            target = torch.tensor(self.targets[idx], dtype=torch.float32)
            return image, tabular, target

        return image, tabular


class TabularEncoder(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU()
        )

    def forward(self, x):
        return self.net(x)


class MultimodalRegressor(nn.Module):
    def __init__(self, image_encoder, tabular_dim):
        super().__init__()

        self.image_encoder = image_encoder

        self.regressor = nn.Sequential(
            nn.Linear(512 + tabular_dim, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),

            nn.Linear(512, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),

            nn.Linear(128, 1)
        )

    def forward(self, image, tabular):
        img_feat = self.image_encoder(image)
        x = torch.cat([img_feat, tabular], dim=1)
        return self.regressor(x)



In [6]:
from torchvision import transforms

train_img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
val_img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [7]:
from torchvision import transforms
from torch.utils.data import DataLoader

train_dataset = MultimodalDataset(
    image_dir=cfg["train_image_dir"],
    tabular_data=X_train,
    ids=train_ids,
    targets=y_train,
    transform=train_img_transform
)

val_dataset = MultimodalDataset(
    image_dir=cfg["train_image_dir"],
    tabular_data=X_val,
    ids=val_ids,
    targets=y_val,
    transform=val_img_transform
)

train_loader = DataLoader(train_dataset, batch_size=cfg["batch_size"], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=cfg["batch_size"], shuffle=False)

In [8]:
import torch
import torch.nn as nn
from torchvision import models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [9]:
cfg['num_tabular_features']

23

In [11]:
from torchvision import models
import torch.nn as nn

resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()


model = MultimodalRegressor(
    image_encoder=resnet,
    tabular_dim=cfg["num_tabular_features"]
).to(device)


In [None]:
for param in model.image_encoder.parameters():
    param.requires_grad = False


In [None]:
criterion = nn.MSELoss()

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=3e-4,
    weight_decay=1e-5
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.5,
    patience=3
)


In [None]:
images, tabular, targets = next(iter(train_loader))

print("Image shape   :", images.shape)
print("Tabular shape :", tabular.shape)
print("Target shape  :", targets.shape)


Image shape   : torch.Size([32, 3, 224, 224])
Tabular shape : torch.Size([32, 23])
Target shape  : torch.Size([32])


In [None]:
images, tabular, targets = next(iter(train_loader))

images = images.to(device)
tabular = tabular.to(device)

with torch.no_grad():
    preds = model(images, tabular)

print(preds.shape)


torch.Size([32, 1])


In [12]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0

    for images, tabular, targets in loader:
        images = images.to(device)
        tabular = tabular.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        outputs = model(images, tabular).squeeze(1)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)

    return total_loss / len(loader.dataset)


In [13]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

def validate(model, loader, criterion, device):
    model.eval()

    total_loss = 0.0
    preds = []
    trues = []

    with torch.no_grad():
        for images, tabular, targets in loader:
            images = images.to(device)
            tabular = tabular.to(device)
            targets = targets.to(device)

            outputs = model(images, tabular).squeeze(1)
            loss = criterion(outputs, targets)

            total_loss += loss.item() * images.size(0)


            preds.append(outputs.cpu().numpy())
            trues.append(targets.cpu().numpy())


    preds = np.concatenate(preds)
    trues = np.concatenate(trues)

    mse = mean_squared_error(trues, preds)
    rmse = np.sqrt(mse)
    val_r2 = r2_score(trues, preds)

    return total_loss / len(loader.dataset), rmse, val_r2

In [None]:
EPOCHS = 5

best_rmse = float("inf")

for epoch in range(EPOCHS):
    train_loss = train_one_epoch(
        model, train_loader, optimizer, criterion, device
    )

    val_loss, val_rmse, val_r2 = validate(
        model, val_loader, criterion, device
    )

    scheduler.step(val_rmse)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train loss: {train_loss:.4f} | "
        f"Val RMSE: {val_rmse:.4f} | "
        f"Val R²: {val_r2:.4f}"
    )


    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), "best_multimodal_model.pt")


Epoch [1/5] | Train loss: 154.1866 | Val RMSE: 10.9416 | Val R²: -432.8349
Epoch [2/5] | Train loss: 72.0984 | Val RMSE: 5.2077 | Val R²: -97.2784
Epoch [3/5] | Train loss: 11.1339 | Val RMSE: 1.1867 | Val R²: -4.1032
Epoch [4/5] | Train loss: 4.7092 | Val RMSE: 0.7229 | Val R²: -0.8940
Epoch [5/5] | Train loss: 4.1394 | Val RMSE: 0.7486 | Val R²: -1.0307


In [14]:
criterion = nn.MSELoss()

In [None]:
for param in model.image_encoder.layer4.parameters():
    param.requires_grad = True


In [None]:
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4,
    weight_decay=1e-5
)


In [None]:
EPOCHS = 5



for epoch in range(EPOCHS):
    train_loss = train_one_epoch(
        model, train_loader, optimizer, criterion, device
    )

    val_loss, val_rmse, val_r2 = validate(
        model, val_loader, criterion, device
    )

    scheduler.step(val_rmse)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train loss: {train_loss:.4f} | "
        f"Val RMSE: {val_rmse:.4f} | "
        f"Val R²: {val_r2:.4f}"
    )


    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), "best_multimodal_model.pt")


Epoch [1/5] | Train loss: 3.6079 | Val RMSE: 0.5834 | Val R²: -0.2333
Epoch [2/5] | Train loss: 3.4037 | Val RMSE: 0.5011 | Val R²: 0.0900
Epoch [3/5] | Train loss: 3.2972 | Val RMSE: 0.4285 | Val R²: 0.3346
Epoch [4/5] | Train loss: 2.9724 | Val RMSE: 0.4217 | Val R²: 0.3555
Epoch [5/5] | Train loss: 2.9619 | Val RMSE: 0.6140 | Val R²: -0.3660


In [None]:
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=5e-5,
    weight_decay=1e-5
)


In [None]:
EPOCHS = 5



for epoch in range(EPOCHS):
    train_loss = train_one_epoch(
        model, train_loader, optimizer, criterion, device
    )

    val_loss, val_rmse, val_r2 = validate(
        model, val_loader, criterion, device
    )

    scheduler.step(val_rmse)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train loss: {train_loss:.4f} | "
        f"Val RMSE: {val_rmse:.4f} | "
        f"Val R²: {val_r2:.4f}"
    )


    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), "best_multimodal_model.pt")


Epoch [1/5] | Train loss: 2.9322 | Val RMSE: 0.5410 | Val R²: -0.0605
Epoch [2/5] | Train loss: 2.7045 | Val RMSE: 0.8871 | Val R²: -1.8517
Epoch [3/5] | Train loss: 2.5676 | Val RMSE: 0.4042 | Val R²: 0.4080
Epoch [4/5] | Train loss: 2.4418 | Val RMSE: 0.4390 | Val R²: 0.3018
Epoch [5/5] | Train loss: 2.3865 | Val RMSE: 0.3423 | Val R²: 0.5754


In [None]:
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=5e-7,
    weight_decay=1e-5
)


In [None]:
EPOCHS = 10



for epoch in range(EPOCHS):
    train_loss = train_one_epoch(
        model, train_loader, optimizer, criterion, device
    )

    val_loss, val_rmse, val_r2 = validate(
        model, val_loader, criterion, device
    )

    scheduler.step(val_rmse)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train loss: {train_loss:.4f} | "
        f"Val RMSE: {val_rmse:.4f} | "
        f"Val R²: {val_r2:.4f}"
    )


    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), "best_multimodal_model.pt")


Epoch [1/5] | Train loss: 2.3545 | Val RMSE: 0.3018 | Val R²: 0.6698
Epoch [2/5] | Train loss: 2.3562 | Val RMSE: 0.2972 | Val R²: 0.6800


In [15]:
model.load_state_dict(
    torch.load("best_multimodal_model.pt", map_location=device)
)


<All keys matched successfully>

In [16]:

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-7,
    weight_decay=1e-5
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.5,
    patience=3
)

In [17]:

for param in model.image_encoder.parameters():
    param.requires_grad = False


for param in model.image_encoder.layer4.parameters():
    param.requires_grad = True


In [None]:
EPOCHS = 5

best_rmse = 0.2963


for epoch in range(EPOCHS):
    train_loss = train_one_epoch(
        model, train_loader, optimizer, criterion, device
    )

    val_loss, val_rmse, val_r2 = validate(
        model, val_loader, criterion, device
    )

    scheduler.step(val_rmse)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train loss: {train_loss:.4f} | "
        f"Val RMSE: {val_rmse:.4f} | "
        f"Val R²: {val_r2:.4f}"
    )


    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), "best_multimodal_model.pt")
