In [None]:
import os
import glob
import torch
from utils import *
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

#### Dataset Class

In [None]:
class AirbnbDataset(Dataset):
    def _init__(self, csv, image_root, transforms=None):
        self.listings = pd.read_csv(csv)
        self.image_root = image_root
        self.transforms = transforms
        self.pairs = []

        for _, row in self.listings.iterrows():
            listing_id = row["listing"]
            price = row["price"]
            image_path = os.path.join(self.image_root, listing_id)
            images = glob.glob(os.path.join(image_path, "*.jpg"))

            for path in images:
                self.pairs.append((path, price))

    def __len__(self):
        return len(self.listing_data)

    def __getitem__(self, index):
        image_path, price = self.pairs[index]
        image = Image.open(image_path).convert("RGB")

        if self.transforms:
            image = self.transforms(image)

        return image, torch.tensor([price], dtype=torch.float32)

#### Model Definition

In [None]:
class AirbnbPricePredictor(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = models.resnet18(pretrained=True)
        resnet.fc = nn.Linear(resnet.fc.in_features, 1)
        self.model = resnet

    def forward(self, x):
        return self.model(x)

#### Train and Validate 

In [None]:
def train(model, train_loader, val_loader, device, max_epochs=50, initial_lr=0.0001):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=initial_lr)
    criterion = nn.MSELoss()
    scheduler = ReduceLROnPlateau(optimizer, mode="min", patience=3, factor=0.5)
    best_val_loss = float("inf")
    best_epoch = 0
    train_history, val_history = []
    model.train()
    for epoch in range(max_epochs):
        train_loss = 0.0
        for images, prices in tqdm(train_loader, desc=f"Epoch {epoch+1}/{max_epochs}"):
            images, prices = images.to(device), prices.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs.squeeze(), prices.squeeze())
            train_loss += loss.item()
            loss.backward()
            optimizer.step()

        val_loss = 0.0
        model.eval()
        with torch.no_grad():
            for images, prices in tqdm(val_loader, desc="Validation"):
                images, prices = images.to(device), prices.to(device)
                outputs = model(images)
                loss = criterion(outputs.squeeze(), prices.squeeze())
                val_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        train_history.append(avg_train_loss)
        val_history.append(avg_val_loss)
        scheduler.step(avg_val_loss)

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_epoch = epoch
            torch.save(
                model.state_dict(),
                f"cnn_checkpoints/{model.__class__.__name__}_{best_epoch}.pth",
            )
        elif epoch - best_epoch > 10:  # Early stopping
            print("Early stopping triggered.")
            break

        plot_loss(train_history, val_history)
        print(f"Best Epoch: {best_epoch}, Validation Loss: {best_val_loss:.4f}")

In [None]:
def inference(model, listing_id, device):
    with torch.no_grad():
        model.to(device)
        model.eval()

        predictions = []
        for image in glob.glob(f"images/{listing_id}/*.jpg"):
            image = image.to(device)
            output = model(image)
            prediction = np.exp(output.item())
            prediction.append(prediction)

        avg_prediction = np.mean(predictions)
        return avg_prediction