# Image Embedding Generation (CNN)

This notebook uses a pretrained CNN to extract fixed-length embeddings
from satellite images. These embeddings are saved to disk and reused
for downstream multimodal regression.

In [1]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import Dataset, DataLoader

from PIL import Image
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

In [2]:
device = torch.device("cpu")
device

device(type='cpu')

In [3]:
train_df = pd.read_csv("../data/processed/train_tabular.csv")
test_df  = pd.read_csv("../data/processed/test_tabular.csv")

print(train_df.shape, test_df.shape)

(16209, 14) (5404, 13)


In [4]:
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [5]:
class SatelliteImageDataset(Dataset):
    def __init__(self, dataframe, transform):
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, "image_path"]
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        return image

In [6]:
BATCH_SIZE = 16  

train_dataset = SatelliteImageDataset(train_df, image_transforms)
test_dataset  = SatelliteImageDataset(test_df, image_transforms)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

In [8]:
from torchvision.models import resnet18, ResNet18_Weights

weights = ResNet18_Weights.DEFAULT
resnet = resnet18(weights=weights)
resnet.eval()

feature_extractor = torch.nn.Sequential(
    *list(resnet.children())[:-1]
).to(device)

In [9]:
def extract_embeddings(dataloader, model):
    embeddings = []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            batch = batch.to(device)
            features = model(batch)
            features = features.view(features.size(0), -1)
            embeddings.append(features.cpu().numpy())

    embeddings = np.vstack(embeddings)
    return embeddings

In [10]:
train_embeddings = extract_embeddings(train_loader, feature_extractor)
print(train_embeddings.shape)

100%|██████████| 1014/1014 [14:56<00:00,  1.13it/s]

(16209, 512)





In [11]:
test_embeddings = extract_embeddings(test_loader, feature_extractor)
print(test_embeddings.shape)

100%|██████████| 338/338 [03:31<00:00,  1.60it/s]

(5404, 512)





In [12]:
np.save("../data/processed/train_image_embeddings.npy", train_embeddings)
np.save("../data/processed/test_image_embeddings.npy", test_embeddings)