In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import pandas as pd
import requests
from tqdm import tqdm

TEST_CSV = "/content/drive/MyDrive/Satellite_Imagery_Property_Valuation/data/raw/test2(test(1)).csv"
IMAGE_DIR = "/content/drive/MyDrive/Satellite_Imagery_Property_Valuation/data/images/satellite_images_test"

os.makedirs(IMAGE_DIR, exist_ok=True)

ESRI_URL = (
    "https://services.arcgisonline.com/ArcGIS/rest/services/"
    "World_Imagery/MapServer/export"
)

BBOX_SIZE = 0.002

def fetch_image(lat, lon, pid):
    bbox = f"{lon-BBOX_SIZE},{lat-BBOX_SIZE},{lon+BBOX_SIZE},{lat+BBOX_SIZE}"
    params = {
        "bbox": bbox,
        "bboxSR": 4326,
        "imageSR": 4326,
        "size": "224,224",
        "format": "png",
        "f": "image"
    }

    path = os.path.join(IMAGE_DIR, f"{pid}.png")
    if os.path.exists(path):
        return

    r = requests.get(ESRI_URL, params=params, timeout=20)
    if r.status_code == 200:
        with open(path, "wb") as f:
            f.write(r.content)

# Load test data
test_df = pd.read_csv(TEST_CSV)

for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
    fetch_image(row["lat"], row["long"], row["id"])

100%|██████████| 5404/5404 [39:53<00:00,  2.26it/s]


In [4]:
import torch
import numpy as np
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cnn = models.resnet18(pretrained=True)
cnn.fc = torch.nn.Identity()
cnn = cnn.to(device)
cnn.eval()




Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 160MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [7]:
class SatelliteDataset(Dataset):
    def __init__(self, image_paths):
        self.image_paths = image_paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        return transform(img)


In [8]:
test_df["image_path"] = test_df["id"].astype(str).apply(
    lambda x: os.path.join(IMAGE_DIR, f"{x}.png")
)

test_df = test_df[test_df["image_path"].apply(os.path.exists)].reset_index(drop=True)


In [9]:
loader = DataLoader(
    SatelliteDataset(test_df["image_path"].values),
    batch_size=32,
    shuffle=False
)

test_img_features = []

with torch.no_grad():
    for batch in tqdm(loader, desc="Extracting CNN features"):
        batch = batch.to(device)
        feats = cnn(batch)
        test_img_features.append(feats.cpu().numpy())

X_test_img = np.vstack(test_img_features)


Extracting CNN features: 100%|██████████| 169/169 [06:36<00:00,  2.34s/it]


In [50]:
tabular_features = [
    'bedrooms', 'bathrooms', 'sqft_living',
    'floors', 'waterfront', 'view',
    'condition', 'grade',
    'sqft_living15', 'sqft_lot15',
    'lat', 'long'
]

X_test_tab = test_df[tabular_features].values

In [51]:
import joblib

scaler_tab = joblib.load("/content/drive/MyDrive/Satellite_Imagery_Property_Valuation/data/scaler_tab.pkl")
scaler_img = joblib.load("/content/drive/MyDrive/Satellite_Imagery_Property_Valuation/data/scaler_img.pkl")
fusion_model = joblib.load("/content/drive/MyDrive/Satellite_Imagery_Property_Valuation/data/xgb.pkl")

In [52]:
X_test_tab_scaled = scaler_tab.transform(X_test_tab)
X_test_img_scaled = scaler_img.transform(X_test_img)

In [53]:
from sklearn.decomposition import PCA

pca = PCA(n_components=50, random_state=42)
X_test_img_pca = pca.fit_transform(X_test_img_scaled)

print("Explained variance:", pca.explained_variance_ratio_.sum())

Explained variance: 0.6854049


In [54]:
X_test_fusion = np.hstack([
    X_test_tab_scaled,
    X_test_img_pca
])

In [55]:
test_log_preds = fusion_model.predict(X_test_fusion)
test_preds = np.expm1(test_log_preds)

In [56]:
test_preds

array([ 383570.28,  974638.1 , 1021657.56, ...,  277506.88, 1878489.5 ,
        303873.66], dtype=float32)

In [57]:
predicted_file = pd.DataFrame({
    "id": test_df["id"],
    "predicted_price": test_preds
})

predicted_file.to_csv(
    "/content/drive/MyDrive/Satellite_Imagery_Property_Valuation/prediciton.csv",
    index=False
)