# CNN Feature Extraction (Satellite Images)

This notebook defines a pretrained CNN-based feature extractor for satellite images. The model is used to convert images into fixed-size embeddings for downstream multimodal regression.

In [2]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import pandas as pd
import os

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [4]:
train_df = pd.read_csv("../data/processed/train_tabular.csv")
test_df  = pd.read_csv("../data/processed/test_tabular.csv")

train_df.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_living15,lat,long,log_price,image_path
0,0.677402,0.178963,-0.290276,-0.144952,0.922943,-0.083788,-0.306964,-0.626,-0.557611,-0.473911,-0.900034,0.192759,12.501142,../data/images/train/9117000170_0.png
1,-0.394132,0.505667,-0.521813,-0.311135,0.922943,-0.083788,-0.306964,0.908842,-0.557611,-0.385919,-1.137139,0.192759,12.409018,../data/images/train/6700390210_1.png
2,0.677402,0.505667,-0.389506,-0.160457,0.922943,-0.083788,-0.306964,-0.626,0.29635,-0.165941,-2.098571,-0.706669,12.206078,../data/images/train/7212660540_2.png
3,-1.465666,0.178963,-0.918734,-0.364787,0.922943,-0.083788,-0.306964,-0.626,-0.557611,-1.089851,-0.206791,1.006527,12.772806,../data/images/train/8562780200_3.png
4,-0.394132,-0.147741,-0.874632,-0.038936,-0.918626,-0.083788,-0.306964,-0.626,-0.557611,-0.576568,-1.367738,0.999388,12.354497,../data/images/train/7760400350_4.png


In [5]:
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [6]:
resnet = models.resnet18(pretrained=True)
resnet = resnet.to(device)
resnet.eval()



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\arpit/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:03<00:00, 13.2MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
feature_extractor = torch.nn.Sequential(
    *list(resnet.children())[:-1]
).to(device)

In [8]:
img_path = train_df.loc[0, "image_path"]

assert os.path.exists(img_path), "Image path does not exist!"

img = Image.open(img_path).convert("RGB")
img_tensor = image_transforms(img).unsqueeze(0).to(device)

with torch.no_grad():
    features = feature_extractor(img_tensor)

features = features.view(-1)
features.shape

torch.Size([512])