# Testing out embedding + clustering

## Import dataset

In [24]:
from roboflow import Roboflow
import os
import umap

In [32]:
robo_workspace = "cyclinghelper"
robo_project = "pro-cyclist-teams"
robo_version = 1
robo_api_key = os.getenv("ROBOFLOW_API_KEY")

In [39]:
rf = Roboflow(api_key=robo_api_key)
project = rf.workspace(robo_workspace).project(robo_project)
version = project.version(robo_version)
dataset = version.download("folder")

loading Roboflow workspace...
loading Roboflow project...
Exporting format folder in progress : 85.0%
Version export complete for folder format


Python(27774) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Downloading Dataset Version Zip in pro-cyclist-teams-1 to folder:: 100%|██████████| 589/589 [00:00<00:00, 1885.36it/s]





Extracting Dataset Version Zip to pro-cyclist-teams-1 in folder:: 100%|██████████| 174/174 [00:00<00:00, 6075.77it/s]


## Clip Embeddings and Visualize

In [14]:
# Adapted from this notebook - https://colab.research.google.com/drive/1EJFpca6IG8dPCZ2-WwEX5GTDetp1Pe7f?usp=sharing#scrollTo=0OwO0H8UMIA7

import torch
import clip
from PIL import Image
import cv2
import base64
import glob
import numpy as np
import os
import umap
import time

model_name='ViT-B/32'

# MPS for apple - switch to CUDA later if I use a GPU
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")
# device='cpu'

openai_clip_model, openai__preprocess = clip.load(model_name,device)

def get_openai_clip_embedding(imgs):
  with torch.no_grad():
    preprocessed = torch.stack([openai__preprocess(i) for i in imgs]).to(device)
    features = openai_clip_model.encode_image(preprocessed)
    features /= features.norm(dim=-1, keepdim=True)
    return features[0]

tensor([1.], device='mps:0')


In [2]:
e = get_openai_clip_embedding([Image.open('crops/Gent Wevelgem 2024_image_3.jpg')])
e.cpu().numpy().shape

(512,)

In [11]:
# Create embeddings and align with labels and images
def image_to_data_uri(image_path):
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
    return "data:image/jpeg;base64," + encoded_image

SOURCE_DIR = 'pro-cyclist-teams-1/test/'

labels = []
openclip_embeddings = []
openai_clip_embeddings = []
train = []
images = []
image_paths = []

class_ids = sorted(os.listdir(SOURCE_DIR))

for class_id in class_ids:
    source_subdir = os.path.join(SOURCE_DIR, class_id)
    for image_path in glob.glob(source_subdir+'/*.jpg'):
      try:
        image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
        img = Image.open(image_path)
        openai_clip_emb = get_openai_clip_embedding([img])
        openai_clip_embeddings.append(openai_clip_emb.cpu().numpy())
        labels.append(class_id)
        images.append(image)
        image_paths.append(str(image_path))
      except:
        print(f"can't process {image_path}")
        pass

# class associated with image
labels = np.array(labels)
# features extracted from image
openai_clip_embeddings = np.array(openai_clip_embeddings)


# local image path
image_paths = np.array(image_paths)
# cached images
image_data_uris = {path: image_to_data_uri(path) for path in image_paths}

In [23]:
import plotly.graph_objects as go
import plotly.express as px

from typing import Dict
from pathlib import Path
from IPython.display import display, HTML


def display_projections(
    labels: np.ndarray,
    projections: np.ndarray,
    image_paths: np.ndarray,
    image_data_uris: Dict[str, str],
    show_legend: bool = False,
    show_markers_with_text: bool = True
) -> None:
    # Create a separate trace for each unique label
    unique_labels = np.unique(labels)
    traces = []
    for unique_label in unique_labels:
        mask = labels == unique_label
        customdata_masked = image_paths[mask]
        trace = go.Scatter3d(
            x=projections[mask][:, 0],
            y=projections[mask][:, 1],
            z=projections[mask][:, 2],
            mode='markers+text' if show_markers_with_text else 'markers',
            text=labels[mask],
            customdata=customdata_masked,
            name=str(unique_label),
            marker=dict(size=8),
            hovertemplate="<b>class: %{text}</b><br>path: %{customdata}<extra></extra>"
        )
        traces.append(trace)

    # Create the 3D scatter plot
    fig = go.Figure(data=traces)
    fig.update_layout(
        scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'),
        width=1000,
        height=1000,
        showlegend=show_legend
    )

    # Convert the chart to an HTML div string and add an ID to the div
    plotly_div = fig.to_html(full_html=False, include_plotlyjs=False, div_id="scatter-plot-3d")

    # Define your JavaScript code for copying text on point click
    javascript_code = f"""
    <script>
        function displayImage(imagePath) {{
            var imageElement = document.getElementById('image-display');
            var placeholderText = document.getElementById('placeholder-text');
            var imageDataURIs = {image_data_uris};
            imageElement.src = imageDataURIs[imagePath];
            imageElement.style.display = 'block';
            placeholderText.style.display = 'none';
        }}

        // Get the Plotly chart element by its ID
        var chartElement = document.getElementById('scatter-plot-3d');

        // Add a click event listener to the chart element
        chartElement.on('plotly_click', function(data) {{
            var customdata = data.points[0].customdata;
            displayImage(customdata);
        }});
    </script>
    """

    # Create an HTML template including the chart div and JavaScript code
    html_template = f"""
    <!DOCTYPE html>
    <html>
        <head>
            <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
            <style>
                #image-container {{
                    position: fixed;
                    top: 0;
                    left: 0;
                    width: 200px;
                    height: 200px;
                    padding: 5px;
                    border: 1px solid #ccc;
                    background-color: white;
                    z-index: 1000;
                    box-sizing: border-box;
                    display: flex;
                    align-items: center;
                    justify-content: center;
                    text-align: center;
                }}
                #image-display {{
                    width: 100%;
                    height: 100%;
                    object-fit: contain;
                }}
            </style>
        </head>
        <body>
            {plotly_div}
            <div id="image-container">
                <img id="image-display" src="" alt="Selected image" style="display: none;" />
                <p id="placeholder-text">Click on a data entry to display an image</p>
            </div>
            {javascript_code}
        </body>
    </html>
    """

    # Display the HTML template in the Jupyter Notebook
    return(html_template)

In [9]:
openai_clip_embeddings

array([], dtype=float64)

In [26]:
start = time.time()
projections = umap.UMAP(n_components=3).fit_transform(openai_clip_embeddings)
end = time.time()
print(f"generating projections with UMAP took: {(end-start):.2f} sec")
html_template = display_projections(
    labels=labels,
    projections=projections,
    image_paths=image_paths,
    image_data_uris=image_data_uris
)

generating projections with UMAP took: 1.16 sec


In [27]:
# Specify the filename
filename = 'test_template.html'

# Open the file in write mode and save the HTML template
with open(filename, 'w') as file:
    file.write(html_template)

print(f"HTML template saved as {filename}")

HTML template saved as test_template.html


# Old

In [None]:
import YOLO
# Test out the trained model on video snippets
model = 'models/best.pt'
video = 'video_snippets/Gent Wevelgem 2024_snippet_2.mp4'

model = YOLO(model)
model.predict(video, save=True,conf=0.8)