This script is used to finetune the image encoder model using the Circle Loss. The model is trained on a triplet dataset where each triplet consists of an anchor, positive, and negative image. The model is trained to minimize the distance between the anchor and positive images while maximizing the distance between the anchor and negative images. The model is trained using the Circle Loss function which is a variant of the triplet loss function. The model is trained for a fixed number of epochs and the embeddings are generated for the central regions. The embeddings are saved to a CSV file for further analysis.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.models import convnext_large, ConvNeXt_Large_Weights
from torchvision import transforms
from PIL import Image
import pandas as pd
import geopandas as gpd
import numpy as np
from srai.neighbourhoods import H3Neighbourhood
import random
import wandb
from tqdm import tqdm
import os

In [None]:
# Set the resolution here (9 or 10)
RESOLUTION = 10

In [None]:
class BufferedH3TripletDataset(Dataset):
    def __init__(self, regions_buffered_gdf, image_dir):
        self.regions_buffered_gdf = regions_buffered_gdf
        self.image_dir = image_dir
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.neighborhood = H3Neighbourhood(regions_buffered_gdf)

    def __len__(self):
        return len(self.regions_buffered_gdf)

    def __getitem__(self, idx):
        anchor_id = self.regions_buffered_gdf.index[idx]
        
        # Sample k-ring (inside positive - outside negative) hard batch mining
        if RESOLUTION == 9:
            positive_ring = random.choice([1, 2])
            negative_ring = random.choice([3, 4])
        elif RESOLUTION == 10:
            positive_ring = random.randint(1, 4)
            negative_ring = random.randint(5, 8)
        else:
            raise ValueError("Unsupported resolution")
        
        positive_neighbors = self.neighborhood.get_neighbours_at_distance(anchor_id, positive_ring)
        negative_neighbors = self.neighborhood.get_neighbours_at_distance(anchor_id, negative_ring)
        
        # Pick a random region_id from the sampled k-ring
        positive_id = random.choice(list(positive_neighbors)) if positive_neighbors else anchor_id
        negative_id = random.choice(list(negative_neighbors)) if negative_neighbors else anchor_id

        return (self.load_image(anchor_id), self.load_image(positive_id), self.load_image(negative_id)), (anchor_id, positive_id, negative_id)

    def load_image(self, region_id):
        image_path = os.path.join(self.image_dir, f"{region_id}.jpg")
        if os.path.exists(image_path):
            return self.transform(Image.open(image_path).convert('RGB'))
        return torch.zeros(3, 224, 224)

class FineTunedConvNeXt(nn.Module):
    def __init__(self):
        super(FineTunedConvNeXt, self).__init__()
        # Load the pretrained ConvNeXt model
        self.convnext = convnext_large(weights=ConvNeXt_Large_Weights.DEFAULT)
        # Replace the classifier layer with an identity layer to get embeddings
        self.convnext.classifier = nn.Identity()
        # Verify the output size of the model
        print(f"ConvNeXt output size: {self.convnext(torch.randn(1, 3, 224, 224)).shape[1]}")
        
    def forward(self, x):
        # Pass input through ConvNeXt to get embeddings
        embeddings = self.convnext(x)
        # Flatten the output if necessary
        embeddings = embeddings.view(embeddings.size(0), -1)
        return embeddings

class CircleLoss(nn.Module):
    def __init__(self, m=0.25, gamma=256):
        super().__init__()
        self.m, self.gamma = m, gamma
        self.soft_plus = nn.Softplus()

    def forward(self, sp, sn):
        ap = torch.clamp_min(-sp.detach() + 1 + self.m, min=0.)
        an = torch.clamp_min(sn.detach() + self.m, min=0.)
        delta_p, delta_n = 1 - self.m, self.m
        logit_p = -ap * (sp - delta_p) * self.gamma
        logit_n = an * (sn - delta_n) * self.gamma
        return self.soft_plus(torch.logsumexp(logit_n, dim=0) + torch.logsumexp(logit_p, dim=0))

In [None]:
def train_model(model, dataloader, optimizer, criterion, device, epochs, checkpoint_dir, resume_epoch=0):
    os.makedirs(checkpoint_dir, exist_ok=True)
    best_loss = float('inf')
    for epoch in range(resume_epoch, epochs):
        model.train()
        total_loss = 0
        for (anchor_imgs, positive_imgs, negative_imgs), _ in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            anchor_imgs, positive_imgs, negative_imgs = anchor_imgs.to(device), positive_imgs.to(device), negative_imgs.to(device)
            optimizer.zero_grad()
            anchor_features, positive_features, negative_features = model(anchor_imgs), model(positive_imgs), model(negative_imgs)
            sp, sn = (anchor_features * positive_features).sum(dim=1), (anchor_features * negative_features).sum(dim=1)
            loss = criterion(sp, sn)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            wandb.log({"batch_loss": loss.item(), "learning_rate": optimizer.param_groups[0]['lr']})
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{epochs}, Average Loss: {avg_loss:.4f}")
        wandb.log({"epoch": epoch+1, "average_loss": avg_loss})
        checkpoint = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss,
        }
        torch.save(checkpoint, os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch+1}.pth'))
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(checkpoint, os.path.join(checkpoint_dir, f'best_model_{RESOLUTION}.pth'))
    torch.save(model.state_dict(), os.path.join(checkpoint_dir, f'final_model_{RESOLUTION}.pth'))

In [None]:
def generate_embeddings(model, regions_gdf, image_dir, device, batch_size=64):
    model.eval()
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    dataset = RegionDataset(regions_gdf, image_dir, transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    embeddings = {}
    missing_regions = []

    with torch.no_grad():
        for images, region_ids in tqdm(dataloader, desc="Generating embeddings"):
            images = images.to(device)
            features = model(images)
            for feature, region_id in zip(features, region_ids):
                if torch.all(feature == 0):
                    missing_regions.append(region_id)
                embeddings[region_id] = feature.cpu().numpy()

    print(f"Number of missing regions: {len(missing_regions)}")
    return pd.DataFrame.from_dict(embeddings, orient='index'), missing_regions

class RegionDataset(Dataset):
    def __init__(self, regions_gdf, image_dir, transform):
        self.regions_gdf, self.image_dir, self.transform = regions_gdf, image_dir, transform

    def __len__(self):
        return len(self.regions_gdf)

    def __getitem__(self, idx):
        region_id = self.regions_gdf.index[idx]
        try:
            image = Image.open(os.path.join(self.image_dir, f"{region_id}.jpg")).convert('RGB')
            return self.transform(image), region_id
        except FileNotFoundError:
            # Return a zero tensor instead of None
            return torch.zeros(3, 224, 224), region_id

In [None]:
if __name__ == "__main__":
    wandb.init(project="Urban_Representation_Learning", config={
        "learning_rate": 1e-5,  # Adjusted for Adam optimizer
        "epochs": 2,
        "batch_size": 16,
        "resolution": RESOLUTION,
        "weight_decay": 1e-4,  # Adjusted for Adam optimizer
    })

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    regions_gdf = gpd.read_file(f"selected_regions_{RESOLUTION}.geojson").set_index("region_id")
    regions_buffered_gdf = gpd.read_file(f"selected_regions_buffered_{RESOLUTION}.geojson").set_index("region_id")
    image_dir = fr"D:\tu delft\Afstuderen\aerial_images_{RESOLUTION}"

    dataset = BufferedH3TripletDataset(regions_buffered_gdf, image_dir)
    dataloader = DataLoader(dataset, batch_size=wandb.config.batch_size, shuffle=True)

    model = FineTunedConvNeXt().to(device)
    optimizer = optim.Adam(model.parameters(), lr=wandb.config.learning_rate, weight_decay=wandb.config.weight_decay)
    criterion = CircleLoss()
    
    checkpoint_dir = fr"D:\tu delft\Afstuderen\Phase 6 Experiments\checkpoints_res{RESOLUTION}"
    os.makedirs(checkpoint_dir, exist_ok=True)
    resume_epoch = 0

    best_model_path = os.path.join(checkpoint_dir, f'best_model_{RESOLUTION}.pth')
    if os.path.exists(best_model_path):
        checkpoint = torch.load(best_model_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        resume_epoch = checkpoint['epoch']
        print(f"Resuming training from epoch {resume_epoch}")
    else:
        print("Starting training from scratch")

    print("Starting training...")
    train_model(model, dataloader, optimizer, criterion, device, wandb.config.epochs, checkpoint_dir, resume_epoch)

    # After calling generate_embeddings
    embeddings_df, missing_regions = generate_embeddings(model, regions_gdf, image_dir, device)
    
    print(f"Number of missing regions: {len(missing_regions)}")
    
    # Local averaging for missing region_ids
    neighborhood = H3Neighbourhood(regions_buffered_gdf)
    for region_id in missing_regions:
        neighbors = neighborhood.get_neighbours_at_distance(region_id, 1)
        neighbor_embeddings = embeddings_df.loc[embeddings_df.index.isin(neighbors)]
        if not neighbor_embeddings.empty:
            embeddings_df.loc[region_id] = neighbor_embeddings.mean()
    
    # Save embeddings
    output_dir = r"D:\tu delft\Afstuderen\Phase 6 Experiments\embeddings"
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, f"learned_finetune_circle_h3_res_{RESOLUTION}.csv")
    embeddings_df.to_csv(output_file)
    print(f"Embeddings saved to {output_file}")
    
    # Print information about remaining missing regions
    remaining_missing = set(missing_regions) - set(embeddings_df.index)
    print(f"Number of regions still missing after local averaging: {len(remaining_missing)}")
    if remaining_missing:
        print("These regions have no neighbors with embeddings:")
        print(remaining_missing)
    
    model.eval()
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total number of parameters: {total_params:,}")

    wandb.finish()
    print("All done!")

In [None]:
from Plotting import pca_plot, cluster_agglomerative_plot, cluster_kmeans_plot
import warnings
warnings.filterwarnings("ignore")
from sklearn.decomposition import PCA
pca = PCA(n_components=100)
embeddings_reduced_df = pca.fit_transform(embeddings_df)
# print variance explained
print(f"Variance explained: {pca.explained_variance_ratio_.sum()*100}%")
# ensure the index is preserved
embeddings_reduced_df = pd.DataFrame(embeddings_reduced_df, index=embeddings_df.index)
cluster_agglomerative_plot(embeddings_reduced_df, regions_gdf, 10)

In [None]:
# # After calling generate_embeddings
# embeddings_df, missing_regions = generate_embeddings(model, regions_gdf, image_dir, device)
# 
# print(f"Number of missing regions: {len(missing_regions)}")
# 
# # Local averaging for missing region_ids
# neighborhood = H3Neighbourhood(regions_buffered_gdf)
# for region_id in missing_regions:
#     neighbors = neighborhood.get_neighbours_at_distance(region_id, 1)
#     neighbor_embeddings = embeddings_df.loc[embeddings_df.index.isin(neighbors)]
#     if not neighbor_embeddings.empty:
#         embeddings_df.loc[region_id] = neighbor_embeddings.mean()
# 
# # Save embeddings
# output_dir = r"D:\tu delft\Afstuderen\Phase 6 Experiments\embeddings"
# os.makedirs(output_dir, exist_ok=True)
# output_file = os.path.join(output_dir, f"learned_finetune_circle_h3_res_{RESOLUTION}.csv")
# embeddings_df.to_csv(output_file)
# print(f"Embeddings saved to {output_file}")
# 
# # Print information about remaining missing regions
# remaining_missing = set(missing_regions) - set(embeddings_df.index)
# print(f"Number of regions still missing after local averaging: {len(remaining_missing)}")
# if remaining_missing:
#     print("These regions have no neighbors with embeddings:")
#     print(remaining_missing)
# 
# model.eval()
# total_params = sum(p.numel() for p in model.parameters())
# print(f"Total number of parameters: {total_params:,}")
# 
# wandb.finish()
# print("All done!")

In [None]:
# # plotting embeddings for test
# from Plotting import pca_plot, cluster_agglomerative_plot, cluster_kmeans_plot
# import warnings
# warnings.filterwarnings("ignore")
# from sklearn.decomposition import PCA
# pca = PCA(n_components=100)
# embeddings_reduced_df = pca.fit_transform(embeddings_df)
# # print variance explained
# print(f"Variance explained: {pca.explained_variance_ratio_.sum()*100}%")
# # ensure the index is preserved
# embeddings_reduced_df = pd.DataFrame(embeddings_reduced_df, index=embeddings_df.index)
# cluster_agglomerative_plot(embeddings_reduced_df, regions_gdf, 10)

In [None]:
# # get r squared value of dim reduced embeddings to leefbaarometer scores in region_gdf (regions_gdf['afw'])
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import r2_score
# import numpy as np
# # get the scores
# scores = regions_gdf['afw']
# # get the embeddings
# embeddings = embeddings_df.loc[regions_gdf.index]
# # fit the model
# model = LinearRegression().fit(embeddings, scores)
# # get the r squared value
# r2 = r2_score(scores, model.predict(embeddings))
# print(r2)