# DINOV2 similarity metric demo

**There are some neccessary setup steps to run this notebook.**
1. If you're using VS Code, change **Jupyter: Notebook File Root** to **${workspaceFolder}**.
2. Download a pretrained backbone from official DINOV2 repo: https://github.com/facebookresearch/dinov2. You're looking for "dinov2_vitb14_reg4_pretrain.pth".
3. Put the neccessary data inside the **data** folder.

In [2]:
import warnings
warnings.filterwarnings("ignore")

import torch
import cv2
import pandas as pd
import matplotlib.pyplot as plt

from utilities import *
from modules import DinoV2

## Data

**Quick overview**:
 - MISSING 6A in WEB and AI
 - MISSING 12B in AI
 - MISSING 22B in WEB and AI
 - MISSING 23B in AI
 - MISSING 26 IN WEB and AI (!) - because of this we need to skip group 26 for now

In [None]:
groups = get_groups()
final_data = pd.concat([groups.iloc[:25], groups[26:]], axis=0)
final_data

Let's check if everything loaded properly.

In [None]:
for i in range(len(final_data)):
    print(f'Sample photos from group {final_data.iloc[i].group_code}')
    ai = cv2.imread(final_data.iloc[i].ai_images[1])
    web = cv2.imread(final_data.iloc[i].web_images[1])
    final = cv2.imread(final_data.iloc[i].final_submissions[1])
    ai_aggregated_similarity, web_aggregated_similarity = 0, 0
    max_similarity, picture1, picture2 = 0, "", ""

    fig = plt.figure(figsize=(20, 10))
    plt.subplot(1, 3, 1)
    plt.imshow(ai)
    plt.title("AI")
    plt.subplot(1, 3, 2)
    plt.imshow(web)
    plt.title("WEB")
    plt.subplot(1, 3, 3)
    plt.imshow(final)
    plt.title("Submission")
    plt.show()

## DINOV2

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
dino = DinoV2(device=DEVICE)
torch.cuda.empty_cache()

### Small demo of the attention maps

In [None]:
file = 'data/ai/1B_15_4.png'
with torch.no_grad():
    attention_mask, attention_map = dino.return_attention_map(file)
    display(attention_map)

## Let's see if the similarity scores make any sense.

In [None]:
group = 19

with torch.inference_mode():
    torch.cuda.empty_cache()

    ai = final_data.iloc[group].ai_images
    web = final_data.iloc[group].web_images
    final = final_data.iloc[group].final_submissions
    max_ai, min_ai, max_web, min_web = -1, float('inf'), -1, float('inf')
    ai_total, web_total = 0, 0
    max_similarity, pic1, pic2 = -1, "", ""
    inspiration = "INCONCLUSIVE"

    print(f'Calculating similarity for group {final_data.iloc[group].group_code}')

    for final_photo in final:
        torch.cuda.empty_cache()
        
        print("AI PHOTOS")
        for ai_photo in ai:
            similarity = dino.calculate_similarity(final_photo, ai_photo)
            print(f"Similarity: {similarity}")

            min_ai = min(min_ai, similarity)
            max_ai = max(max_ai, similarity)

            dino.draw_attention(final_photo, ai_photo)
            dino.draw_lines(final_photo, ai_photo, "AI")

            ai_total += similarity
            if similarity > max_similarity:
                max_similarity = similarity
                pic1, pic2 = final_photo, ai_photo
                if similarity > 0.5: inspiration = "AI"
        del ai_photo
        torch.cuda.empty_cache()

        print("WEB PHOTOS")
        for web_photo in web:
            similarity = dino.calculate_similarity(final_photo, web_photo)
            print(f"Similarity: {similarity}")
            
            min_web = min(min_web, similarity)
            max_web = max(max_web, similarity)

            dino.draw_attention(final_photo, web_photo)
            dino.draw_lines(final_photo, web_photo, "WEB")

            web_total += similarity
            if similarity > max_similarity:
                max_similarity = similarity
                pic1, pic2 = final_photo, web_photo
                if similarity > 0.5: inspiration = "WEB"
        del web_photo
        torch.cuda.empty_cache()
        break

    ai_total = ai_total / (len(final) * len(ai))
    web_total = web_total / (len(final) * len(web))

    del final_photo, final, ai, web
    torch.cuda.empty_cache()

    print(f'\tSimilarity scores - AI: {ai_total:.3f}\tWEB: {web_total:.3f}')
    print(f'\tAI similarity - MAX: {max_ai} | MIN: {min_ai}')
    print(f'\tWEB similarity - MAX: {max_web} | MIN: {min_web}')
    print(f'\tAccording to DINO, this group was mostly inspired by {inspiration}.')
        
    if len(pic1) != 0 and len(pic2) != 0:
        dino.draw_attention(pic1, pic2)
        dino.draw_lines(pic1, pic2, inspiration)

## Let's see it in action!

In [None]:
similarity_results = pd.DataFrame(columns=['final_photo', 'inspiration', 'similarity'])

with torch.inference_mode():
    for i in range(len(final_data)):
        torch.cuda.empty_cache()

        ai = final_data.iloc[i].ai_images
        web = final_data.iloc[i].web_images
        final = final_data.iloc[i].final_submissions
        
        ai_total, web_total = 0, 0
        max_similarity, pic1, pic2 = -1, "", ""
        min_similarity, pic3, pic4 = float('inf'), "", ""
        inspiration = "INCONCLUSIVE"

        print(f'Calculating similarity for group {final_data.iloc[i].group_code}')

        for final_photo in final:
            
            torch.cuda.empty_cache()
            for ai_photo in ai:
                similarity = dino.calculate_similarity(final_photo, ai_photo)
                new_row = pd.DataFrame({'final_photo': [final_photo], 'inspiration': [ai_photo], 'similarity': [similarity]})
                similarity_results = pd.concat([similarity_results, new_row], ignore_index=True)

                ai_total += similarity
                
                if similarity > max_similarity:
                    max_similarity = similarity
                    pic1, pic2 = final_photo, ai_photo
                    if similarity > 0.5: inspiration = "AI"
                if similarity < min_similarity:
                    min_similarity = similarity
                    pic3, pic4 = final_photo, ai_photo
            
            del ai_photo
            torch.cuda.empty_cache()

            for web_photo in web:
                similarity = dino.calculate_similarity(final_photo, web_photo)
                new_row = pd.DataFrame({'final_photo': [final_photo], 'inspiration': [web_photo], 'similarity': [similarity]})
                similarity_results = pd.concat([similarity_results, new_row], ignore_index=True)

                web_total += similarity
                
                if similarity > max_similarity:
                    max_similarity = similarity
                    pic1, pic2 = final_photo, web_photo
                    if similarity > 0.5: inspiration = "WEB"
                if similarity < min_similarity:
                    min_similarity = similarity
                    pic3, pic4 = final_photo, web_photo

            del web_photo
            torch.cuda.empty_cache()

        ai_total = ai_total / (len(final) * len(ai))
        web_total = web_total / (len(final) * len(web))

        del final_photo, final, ai, web
        torch.cuda.empty_cache()

        print(f'\tSimilarity scores - AI: {ai_total:.3f}\tWEB: {web_total:.3f}')
        print(f'\tAccording to DINO, this group was mostly inspired by {inspiration}.')
        
        if len(pic1) != 0 and len(pic2) != 0:
            dino.draw_attention(pic1, pic2)
            dino.draw_lines(pic1, pic2, inspiration)

similarity_results.to_csv('similarity_results.csv', index=False)