# **Image Model**

In [1]:
import sys
sys.executable

'/opt/anaconda3/bin/python'

In [2]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cpu')

In [4]:
# Define image preprocessing pipeline
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),   # ResNet input size
    transforms.ToTensor(),           # Convert to tensor [0,1]
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],   # ImageNet mean
        std=[0.229, 0.224, 0.225]     # ImageNet std
    )
])

print("Image preprocessing pipeline ready")

Image preprocessing pipeline ready


In [5]:
class SatelliteImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, "image_path"]

        try:
            image = Image.open(img_path).convert("RGB")
        except Exception:
        # fallback: black image
            image = Image.new("RGB", (224, 224), color=(0, 0, 0))

        if self.transform:
            image = self.transform(image)

        return image

In [6]:
from pathlib import Path

# Load the image-linked dataset
image_df = pd.read_csv("../data/processed/train_with_images.csv")

# IMPORTANT: id is float -> filenames are like 12345.0.png
image_df["id"] = image_df["id"].astype(float)


In [7]:
num_rows = len(image_df)
num_unique_ids = image_df["id"].nunique()

print("Rows:", num_rows)
print("Unique IDs:", num_unique_ids)


Rows: 6000
Unique IDs: 5982


In [8]:
image_df = image_df.sort_values("date")


In [9]:
image_df = image_df.drop_duplicates(
    subset="id",
    keep="last"
).reset_index(drop=True)


In [10]:
print("Rows:", len(image_df))
print("Unique IDs:", image_df["id"].nunique())


Rows: 5982
Unique IDs: 5982


In [11]:
# Build correct image paths
IMAGE_DIR = Path("../data/images")

image_df["image_path"] = image_df["id"].apply(
    lambda x: IMAGE_DIR / f"{float(x):.1f}.png"
)

image_df["image_path"].head(5)

0    ../data/images/5101405331.0.png
1    ../data/images/7305300695.0.png
2    ../data/images/8564860280.0.png
3    ../data/images/2391600950.0.png
4    ../data/images/1999700045.0.png
Name: image_path, dtype: object

In [12]:
# Keep only rows where image actually exists
image_df["image_exists"] = image_df["image_path"].apply(lambda x: x.exists())
image_df = image_df[image_df["image_exists"]].reset_index(drop=True)

image_df.shape

print("Rows after filtering:", len(image_df))
print("Price distribution after filtering:")
print(image_df["price"].describe())

pd.read_csv("../data/processed/train_with_images.csv")["price"].describe()


Rows after filtering: 5982
Price distribution after filtering:
count    5.982000e+03
mean     5.388394e+05
std      3.526562e+05
min      8.100000e+04
25%      3.230000e+05
50%      4.510000e+05
75%      6.400000e+05
max      4.668000e+06
Name: price, dtype: float64


count    6.000000e+03
mean     5.384922e+05
std      3.528704e+05
min      8.100000e+04
25%      3.228750e+05
50%      4.503000e+05
75%      6.400000e+05
max      4.668000e+06
Name: price, dtype: float64

In [13]:
# Create dataset and dataloader
image_dataset = SatelliteImageDataset(
    dataframe=image_df,
    transform=image_transform
)

image_loader = DataLoader(
    image_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0
)

len(image_dataset)


5982

In [14]:
# Load pre-trained ResNet model
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()  # Remove final classification layer
resnet




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [15]:
# Freeze ResNet parameters to use as a fixed feature extractor
for param in resnet.parameters():
    param.requires_grad = False

# Move model to device and set to evaluation mode
resnet = resnet.to(device)
resnet.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [16]:
# Extract embeddings for all images
all_embeddings = []

resnet.eval()

with torch.no_grad():
    for batch in image_loader:
        batch = batch.to(device)
        embeddings = resnet(batch)
        embeddings = embeddings.view(embeddings.size(0), -1)
        all_embeddings.append(embeddings.cpu().numpy())


In [17]:
# Combine all embeddings into a single numpy array
image_embeddings = np.vstack(all_embeddings)
image_embeddings.shape


(5982, 512)

In [18]:
# Save embeddings to disk
emb_df = pd.DataFrame(image_embeddings)
emb_df["id"] = image_df["id"].values
emb_df.to_csv("../data/processed/image_embeddings.csv", index=False)
print("Image embeddings saved to ../data/processed/image_embeddings.csv")


Image embeddings saved to ../data/processed/image_embeddings.csv


In [19]:
# Load image embeddings with IDs
emb_df = pd.read_csv("../data/processed/image_embeddings.csv")

print(emb_df.shape)
emb_df.head()

# Separate ID and image features
X_img = emb_df.drop(columns=["id"]).values
img_ids = emb_df["id"].values

print(X_img.shape)
print(img_ids.shape)



(5982, 513)
(5982, 512)
(5982,)


In [20]:
targets_df = pd.read_csv("../data/processed/image_targets.csv")

print(targets_df.shape)
targets_df.head()


(6000, 2)


Unnamed: 0,id,price
0,9543000000.0,139950
1,3353400000.0,174000
2,2976801000.0,150000
3,7335400000.0,219500
4,7883601000.0,157500


In [21]:
# Merge to ensure perfect alignment
merged = pd.merge(
    emb_df[["id"]],
    targets_df,
    on="id",
    how="inner"
)

# Log-transform target
y = np.log1p(image_df["price"].values)

print(len(y))


5982


In [22]:
X = X_img
print("X shape:", X.shape)
print("y shape:", y.shape)


X shape: (5982, 512)
y shape: (5982,)


In [23]:
# Create bins
price_bins = pd.qcut(
    y,
    q=10,
    labels=False,
    duplicates="drop"
)


In [24]:
# Balanced Train–Validation split
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=price_bins
)

X_train.shape, X_val.shape

((4785, 512), (1197, 512))

In [25]:
print("Train price range:")
print(np.min(y_train), np.max(y_train))

print("\nValidation price range:")
print(np.min(y_val), np.max(y_val))

Train price range:
11.302216779257382 15.039286283703994

Validation price range:
11.350418300109132 15.356241486613069


In [26]:
# CNN is used only for feature extraction; XGBoost is used for regression
# as it handles noisy image embeddings more reliably than neural networks.

# Direct price prediction with a neural network is avoided because satellite image
# signals are weak and noisy.

In [27]:
from xgboost import XGBRegressor

img_model = XGBRegressor(
    n_estimators=400,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="reg:squarederror",
    random_state=42,
    n_jobs=-1
)

img_model.fit(X_train, y_train)


In [29]:
y_val_pred_log = img_model.predict(X_val)


In [30]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Log-scale metrics
rmse_log = np.sqrt(mean_squared_error(y_val, y_val_pred_log))
r2_log = r2_score(y_val, y_val_pred_log)

print(f"Image-only RMSE (log-price): {rmse_log:.4f}")
print(f"Image-only R2 (log-price): {r2_log:.4f}")


Image-only RMSE (log-price): 0.4579
Image-only R2 (log-price): 0.2318


In [31]:
# Convert log(price) to price
y_val_pred_price = np.expm1(y_val_pred_log)
y_val_true_price = np.expm1(y_val)

In [32]:
y_val_pred = np.expm1(y_val_pred_log)
y_val_true = np.expm1(y_val)

rmse_price = np.sqrt(mean_squared_error(y_val_true, y_val_pred))
r2_price = r2_score(y_val_true, y_val_pred)

print(f"Image-only RMSE (price): ${rmse_price:,.0f}")
print(f"Image-only R2 (price): {r2_price:.4f}")


Image-only RMSE (price): $319,661
Image-only R2 (price): 0.1439
