## Load Google Colab

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

## Load Imports

In [2]:
import os, sys
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm
import pandas as pd
import pickle
import zipfile
import math

## Load Similarity Results

In [None]:
dreamsim_result = './results/dreamsim_results.pkl'
dino_result = './results/dino_results.pkl'
openclip_result = './results/openclip_results.pkl'

# Loading the pickle files
with open(dreamsim_result, 'rb') as file:
    dreamsim_results = pickle.load(file)

with open(dino_result, 'rb') as file:
    dino_results = pickle.load(file)

with open(openclip_result, 'rb') as file:
    openclip_results = pickle.load(file)

# Verifying the loaded data
print("DreamSim Results:", type(dreamsim_results))
print("DINO Results:", type(dino_results))
print("OpenCLIP Results:", type(openclip_results))

## Load Images

In [4]:
noaa_path = './data/NOAA_FINAL_BUILDING'
gsv_path = './data/GSV_FINAL_BUILDING'

In [None]:
# # Define the extraction directory
# noaa_data_dir = '/content/noaa_original_images'
# gsv_data_dir = '/content/gsv_original_images'

# # Unzip the file
# with zipfile.ZipFile(noaa_path, 'r') as zip_ref:
#     zip_ref.extractall(noaa_data_dir)
# print(f'Files extracted to: {noaa_data_dir}')

# with zipfile.ZipFile(gsv_path, 'r') as zip_ref:
#     zip_ref.extractall(gsv_data_dir)
# print(f'Files extracted to: {gsv_data_dir}')

In [5]:
noaa_data_dir = './data/NOAA_FINAL_BUILDING'
gsv_data_dir = './data/GSV_FINAL_BUILDING'

In [None]:
# List all files in the extraction directory
for root, dirs, files in os.walk(noaa_data_dir):
    pass
print(f'Total files: {len(files)}') # 6555

for root, dirs, files in os.walk(gsv_data_dir):
    pass
print(f'Total files: {len(files)}') # 81

In [None]:
valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')

# Function to clean up non-image files
def remove_non_image_files(directory):
    removed_files = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if not file.lower().endswith(valid_extensions):  # Check if the file is not an image
                file_path = os.path.join(root, file)
                os.remove(file_path)  # Delete the file
                removed_files += 1
    print(f'{removed_files} non-image files removed from {directory}')

# Remove non-image files from both directories
remove_non_image_files(noaa_data_dir)
remove_non_image_files(gsv_data_dir)

# Count remaining files
for root, dirs, files in os.walk(noaa_data_dir):
    pass
print(f'Total image files in {noaa_data_dir}: {len(files)}')

for root, dirs, files in os.walk(gsv_data_dir):
    pass
print(f'Total image files in {gsv_data_dir}: {len(files)}')

## Sanity Check

In [None]:
print(len(dreamsim_results))
print(dreamsim_results)

In [None]:
print(len(dino_results))
print(dino_results)

In [None]:
print(len(openclip_results))
print(openclip_results)

## Visualization

In [11]:
def visualize_results(results, noaa_dir, gsv_dir, title="Results Visualization"):
    """
    Visualize query images and their top similar images in a grid format.

    Args:
        results (dict): A dictionary where keys are tuples (query_name, num_filtered),
                        and values are lists of tuples (similar_image_name, similarity_score).
        noaa_dir (str): Path to the root directory containing NOAA images.
        gsv_dir (str): Path to the root directory containing GSV images.
        title (str): Title for the visualization (optional).
    """
    # Number of columns (6: Query Image + 5 similar images)
    n_cols = 6

    # Create a figure
    fig, axes = plt.subplots(len(results) + 1, n_cols, figsize=(25, len(results) * 3))

    # Define labels for the first row
    labels = ["Query"] + [f"Similar {i}" for i in range(1, 6)]
    for col, label in enumerate(labels):
        axes[0, col].text(0.5, 0.5, label, ha='center', va='center', fontsize=12, fontweight='bold')
        axes[0, col].axis('off')

    # Process each NOAA image and its results
    for row_idx, ((noaa_name, num_filtered), top_neighbors) in enumerate(results.items(), start=1):
        # Query image
        noaa_path = os.path.join(noaa_dir, noaa_name)
        if os.path.exists(noaa_path):
            query_img = Image.open(noaa_path)
            axes[row_idx, 0].imshow(query_img)
            axes[row_idx, 0].axis('off')
            # Display query image name and number of filtered images below
            axes[row_idx, 0].text(
                0.5, -0.1, f"{noaa_name}\nFiltered: {num_filtered}",
                ha='center', va='top', transform=axes[row_idx, 0].transAxes, fontsize=9
            )
        else:
            axes[row_idx, 0].text(0.5, 0.5, "Not Found", ha='center', va='center', fontsize=8)
            axes[row_idx, 0].axis('off')

        # Top 5 similar images
        for col_idx, (gsv_name, similarity) in enumerate(top_neighbors[:5], start=1):
            gsv_path = os.path.join(gsv_dir, gsv_name)
            if os.path.exists(gsv_path):
                similar_img = Image.open(gsv_path)
                axes[row_idx, col_idx].imshow(similar_img)
                axes[row_idx, col_idx].axis('off')
                # Display image name and similarity score below the image
                axes[row_idx, col_idx].text(
                    0.5, -0.1, f"{gsv_name}\nSim: {similarity:.2f}",
                    ha='center', va='top', transform=axes[row_idx, col_idx].transAxes, fontsize=9
                )
            else:
                axes[row_idx, col_idx].text(0.5, 0.5, "Not Found", ha='center', va='center', fontsize=8)
                axes[row_idx, col_idx].axis('off')

    # Adjust layout and spacing
    plt.suptitle(title, fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.subplots_adjust(wspace=0.2, hspace=0.2)
    plt.show()

## Visualize DreamSim

In [12]:
noaa_data_dir = './data/NOAA_FINAL_BUILDING'
gsv_data_dir = './data/GSV_FINAL_BUILDING'

In [None]:
# visualize_results(
#     results=dreamsim_results,
#     noaa_dir='/content/noaa_original_images/NOAA_final/',
#     gsv_dir='/content/gsv_original_images/GSV_final/',
#     title="DreamSim Results Visualization"
# )

visualize_results(
    results=dreamsim_results,
    noaa_dir=noaa_data_dir,
    gsv_dir=gsv_data_dir,
    title="DreamSim Results Visualization"
)

## Visualize DINO

In [None]:
visualize_results(
    results=dino_results,
    noaa_dir=noaa_data_dir,
    gsv_dir=gsv_data_dir,
    title="DINO Results Visualization"
)

## Visualize OpenClip

In [None]:
visualize_results(
    results=openclip_results,
    noaa_dir=noaa_data_dir,
    gsv_dir=gsv_data_dir,
    title="OpenClip Results Visualization"
)