In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


# EXTRACTING FEATURE AND PCA

In [None]:
from PIL import Image
from torchvision import transforms
import os

IMAGES_PATH = "/content/drive/MyDrive/IITR/Satellite-project/images"

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

img_path = os.path.join(IMAGES_PATH, "15000.png")
img = Image.open(img_path).convert("RGB")

img_tensor = image_transform(img)

print("Tensor shape:", img_tensor.shape)
print("Min/Max:", img_tensor.min().item(), img_tensor.max().item())


Tensor shape: torch.Size([3, 224, 224])
Min/Max: 0.0470588244497776 0.9803921580314636


In [None]:
import torch
import torchvision.models as models


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


resnet = models.resnet50(pretrained=True)


resnet = torch.nn.Sequential(*list(resnet.children())[:-1])

resnet = resnet.to(device)
resnet.eval()


Using device: cpu




Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 165MB/s]


Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [None]:

img = Image.open(os.path.join(IMAGES_PATH, "15000.png")).convert("RGB")
img_tensor = image_transform(img).unsqueeze(0).to(device)  # add batch dim

with torch.no_grad():
    features = resnet(img_tensor)

print("Raw feature shape:", features.shape)


Raw feature shape: torch.Size([1, 2048, 1, 1])


In [None]:
features = features.view(features.size(0), -1)
print("Flattened shape:", features.shape)


Flattened shape: torch.Size([1, 2048])


In [None]:
import os
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
from tqdm import tqdm


IMAGES_PATH = "/content/drive/MyDrive/IITR/Satellite-project/images"
SAVE_PATH = "/content/drive/MyDrive/IITR/Satellite-project/image_features/resnet50_features.npy"

os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# image transform
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# load pretrained ResNet50 (feature extractor)
import torchvision.models as models

resnet = models.resnet50(pretrained=True)
resnet = torch.nn.Sequential(*list(resnet.children())[:-1])  # remove classifier
resnet = resnet.to(device)
resnet.eval()

# total images
N_IMAGES = 16209

# storage
features_all = np.zeros((N_IMAGES, 2048), dtype=np.float32)

# extraction loop
with torch.no_grad():
    for i in tqdm(range(N_IMAGES), desc="Extracting CNN features"):
        img_path = os.path.join(IMAGES_PATH, f"{i}.png")

        # load image
        img = Image.open(img_path).convert("RGB")
        img_tensor = image_transform(img).unsqueeze(0).to(device)

        # forward pass
        feats = resnet(img_tensor)          # [1, 2048, 1, 1]
        feats = feats.view(-1).cpu().numpy()  # [2048]

        features_all[i] = feats

# save to disk
np.save(SAVE_PATH, features_all)

print("Saved CNN features to:", SAVE_PATH)
print("Final shape:", features_all.shape)


In [None]:
import os
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
import torchvision.models as models

# PATHS
IMAGES_PATH = "/content/drive/MyDrive/IITR/Satellite-project/images"
SAVE_PATH = "/content/drive/MyDrive/IITR/Satellite-project/image_features/resnet50_features.npy"

os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)

#  DEVICE
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

#  IMAGE TRANSFORM
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

#  LOAD RESNET50
resnet = models.resnet50(pretrained=True)
resnet = torch.nn.Sequential(*list(resnet.children())[:-1])  # remove classifier
resnet = resnet.to(device)
resnet.eval()


N_IMAGES = 16209
FEATURE_DIM = 2048

features_all = np.zeros((N_IMAGES, FEATURE_DIM), dtype=np.float32)
bad_images = []


with torch.no_grad():
    for i in tqdm(range(N_IMAGES), desc="Extracting CNN features"):
        img_path = os.path.join(IMAGES_PATH, f"{i}.png")

        try:
            img = Image.open(img_path).convert("RGB")
            img_tensor = image_transform(img).unsqueeze(0).to(device)

            feats = resnet(img_tensor)
            feats = feats.view(-1).cpu().numpy()

            features_all[i] = feats

        except Exception as e:

            bad_images.append(i)
            features_all[i] = np.zeros(FEATURE_DIM, dtype=np.float32)
            print(f"⚠️ Skipped corrupted image: {i}.png | Error: {e}")

# ================= SAVE =================
np.save(SAVE_PATH, features_all)

print("Saved CNN features to:", SAVE_PATH)
print("Final feature shape:", features_all.shape)
print("Total corrupted images:", len(bad_images))
print("Corrupted image indices:", bad_images)


Using device: cuda


Extracting CNN features:   0%|          | 8/16209 [00:00<03:38, 74.25it/s]

⚠️ Skipped corrupted image: 9.png | Error: cannot identify image file '/content/drive/MyDrive/IITR/Satellite-project/images/9.png'


Extracting CNN features: 100%|██████████| 16209/16209 [3:02:27<00:00,  1.48it/s]

Saved CNN features to: /content/drive/MyDrive/IITR/Satellite-project/image_features/resnet50_features.npy
Final feature shape: (16209, 2048)
Total corrupted images: 1
Corrupted image indices: [9]





In [None]:
import numpy as np
from sklearn.decomposition import PCA

# load CNN features
FEATURE_PATH = "/content/drive/MyDrive/IITR/Satellite-project/image_features/resnet50_features.npy"
X_img = np.load(FEATURE_PATH)

print("Original shape:", X_img.shape)

# PCA
pca = PCA(n_components=64, random_state=42)
X_img_pca = pca.fit_transform(X_img)

print("PCA shape:", X_img_pca.shape)
print("Explained variance ratio:", pca.explained_variance_ratio_.sum())

# save PCA features
PCA_SAVE_PATH = "/content/drive/MyDrive/IITR/Satellite-project/image_features/resnet50_pca64.npy"
np.save(PCA_SAVE_PATH, X_img_pca)

print("Saved PCA features to:", PCA_SAVE_PATH)


Original shape: (16209, 2048)
PCA shape: (16209, 64)
Explained variance ratio: 0.86724424
Saved PCA features to: /content/drive/MyDrive/IITR/Satellite-project/image_features/resnet50_pca64.npy


In [None]:
import joblib

# save PCA object (MANDATORY)
PCA_MODEL_PATH = "/content/drive/MyDrive/IITR/Satellite-project/models/image_pca.joblib"
joblib.dump(pca, PCA_MODEL_PATH)

print("Saved PCA model to:", PCA_MODEL_PATH)


Saved PCA model to: /content/drive/MyDrive/IITR/Satellite-project/models/image_pca.joblib


In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm

# paths
IMAGES_PATH = "/content/drive/MyDrive/IITR/Satellite-project/images"
SAVE_PATH = "/content/drive/MyDrive/IITR/Satellite-project/image_features/interpretable_features.npy"

N_IMAGES = 16209

# storage: [green, blue, edge_density, brightness]
interp_features = np.zeros((N_IMAGES, 4), dtype=np.float32)

bad_images = []

for i in tqdm(range(N_IMAGES), desc="Extracting interpretable features"):
    img_path = os.path.join(IMAGES_PATH, f"{i}.png")

    try:
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # ---- GREEN & BLUE RATIO (HSV) ----
        hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)

        # green mask
        green_mask = cv2.inRange(hsv, (35, 40, 40), (85, 255, 255))
        green_ratio = np.sum(green_mask > 0) / green_mask.size

        # blue mask (water)
        blue_mask = cv2.inRange(hsv, (90, 50, 50), (140, 255, 255))
        blue_ratio = np.sum(blue_mask > 0) / blue_mask.size

        # ---- EDGE DENSITY ----
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        edges = cv2.Canny(gray, 100, 200)
        edge_density = np.sum(edges > 0) / edges.size

        # ---- BRIGHTNESS ----
        brightness = np.mean(gray) / 255.0

        interp_features[i] = [
            green_ratio,
            blue_ratio,
            edge_density,
            brightness
        ]

    except Exception as e:
        bad_images.append(i)
        interp_features[i] = np.zeros(4)

# save
np.save(SAVE_PATH, interp_features)

print("Saved interpretable features to:", SAVE_PATH)
print("Shape:", interp_features.shape)
print("Corrupted images handled:", len(bad_images))
