In [1]:
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
from torchvision import models, transforms
from sklearn.cluster import KMeans
import joblib

# --- 1. Setup ---
print("Setting up model and transformations...")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
image_dir = "/kaggle/input/satellite-images-to-predict-povertyafrica/nigeria_archive/images"

# Load pre-trained ResNet50 model
model = models.resnet50(weights='IMAGENET1K_V1') # Using new recommended weights parameter
model = model.to(device)
model.eval()

# Image transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# --- 2. Feature Extraction ---
print("Starting feature extraction from all images... (This will take a while)")
features = []
file_names = [f for f in os.listdir(image_dir) if f.endswith('.png')]

for file_name in tqdm(file_names, desc="Extracting Features"):
    file_path = os.path.join(image_dir, file_name)
    img = cv2.imread(file_path)
    if img is not None:
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_transformed = transform(img_rgb).unsqueeze(0).to(device)
        
        with torch.no_grad():
            feature = model(img_transformed)
        features.append(feature.cpu().numpy().flatten())

features_array = np.array(features)
print(f"Feature extraction complete. Shape: {features_array.shape}")

# --- 3. K-Means Training ---
print("Training K-Means model...")
k = 3
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
kmeans.fit(features_array)
print("K-Means training complete.")

# --- 4. Save Artifacts ---
# Define output file paths in the /kaggle/working/ directory
features_output_path = "/kaggle/working/image_features.npy"
model_output_path = "/kaggle/working/kmeans_model.joblib"

# Save the features array and the trained model
np.save(features_output_path, features_array)
joblib.dump(kmeans, model_output_path)

print(f"\\nArtifacts saved successfully!")
print(f"Features saved to: {features_output_path}")
print(f"Trained model saved to: {model_output_path}")
print("\\nGo to the 'Output' section in the right panel to create a new dataset from these files.")

Setting up model and transformations...


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 156MB/s]


Starting feature extraction from all images... (This will take a while)


Extracting Features: 100%|██████████| 11535/11535 [25:20<00:00,  7.59it/s]


Feature extraction complete. Shape: (11535, 1000)
Training K-Means model...
K-Means training complete.
\nArtifacts saved successfully!
Features saved to: /kaggle/working/image_features.npy
Trained model saved to: /kaggle/working/kmeans_model.joblib
\nGo to the 'Output' section in the right panel to create a new dataset from these files.
