# Intelligently select training data (ISD) that reflects upcoming raw batches
1. Create embeddings for the upcoming batch of data (1000 images) using `ResNet50`
2. Run `top_k` on all embeddings, storing matches in `selected_imgs`
3. Iterate over `selected_imgs` and add to training stage
4. Train new model and view performance on upcoming data

### Notes
Because we are simply going off ResNet's embeddings, it will be important that in the future we check to make sure our data contains a fair representation of classes.

For this notebook, there will be two models trained to compare the performance:
1. __Control__: trained on a random subset of our data equal in length to the other model
2. **ISD**: trained on only the `selected_imgs` from our embeddings


#### Import dependencies

In [None]:
import torch
import os
import torchvision.transforms as transforms
from torchvision import models 
from PIL import Image
import numpy as np
from dotenv import load_dotenv

#### Connect to Pinecone

In [None]:
from pinecone import Pinecone, ServerlessSpec
import pinecone
load_dotenv()
pc = Pinecone(api_key=os.getenv("PC_API_KEY"))
index_name = 'rlr-embeddings'
index = pc.Index(index_name)

##### View ResNet architecture
Take note of the final layer, we will remove the final output layer because the layer prior will act as our embeddings layer <br>
`(fc): Linear(in_features=2048, out_features=1000, bias=True) `

In [None]:
model = models.resnet50(pretrained=True)
model.eval()

# there are 2048 in_features, the dimensions of our embeddings

##### Removing the final layer with `torch`

In [None]:
model = torch.nn.Sequential(*list(model.children())[:-1])
model.eval()

#### Define our transformation function

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),    # ResNet 50 expects image sizes of 224x224
    transforms.ToTensor(),            # converts PIL image / NumPy array to tensor
    transforms.Normalize(             
        mean = [0.485, 0.456, 0.406], # mean for each channel (RGB)
        std = [0.229, 0.224, 0.225]   # std for each channel
    )
])

##### Define upcoming batch (1000 or length of remaining frames)

In [None]:
raw_img_dir = "../data/images/frames"

raw_imgs = os.listdir(raw_img_dir)
if len(raw_imgs) < 1000:
    num_imgs = len(raw_imgs)
else:
    num_imgs = 1000

print(f"Raw images: {len(raw_imgs)}, Number of images: {num_imgs}")


### Embeddings
#### Iterate over our images:
1. Transform
2. Create Embedding
3. Add to list of embeddings

In [None]:
embeddings = []

for i in range(num_imgs):
    
    # open current image
    image_path = os.path.join(raw_img_dir, raw_imgs[i])
    img = Image.open(image_path).convert('RGB')

    img_tensor = transform(img).unsqueeze(0) # add transformations from cell above

    # torch.no_grad does not calculate the gradients to reduce memory usage / increase speed
    with torch.no_grad():
        embedding = model(img_tensor).squeeze(-1).squeeze(-1) # remove the last two dimensions of the tensor

    embedding = embedding / torch.norm(embedding, p=2) #L2 Normalization
    embedding = embedding.numpy().tolist()[0]          # convert to 1 dimensional list

    embeddings.append(embedding) # add embedding to list

### Query Pinecone
Store `match['id']` (which are the names of the images) and find the top **50** nearest neighbors.<br>
Use a set to automatically eliminate duplicates

In [None]:
selected_imgs = set()
all_scores = []
for embedding in embeddings:
    query_response = index.query(
        vector=embedding,
        top_k = 50,
        include_metadata=True
    )
    scores = [match['score'] for match in query_response['matches']]
    all_scores.extend(scores)
    knns = [match['id'] for match in query_response['matches']]
    for neighbor in knns:
        selected_imgs.add(neighbor)

In [None]:
print(len(selected_imgs))
print(all_scores[0])
min_score = 1
for score in all_scores:
    min_score = min(min_score, score)
print(min_score)

##### Calculate the average embedding to retreive the top 10000 NN
Naturally, there will be a lot of overlap between nearest neighbors of the upcoming dataset so we need to find a strategy for expanding the set. This method will increase the number of training examples while also including niche examples (notice how the total number of training examples exceeds 10000. Those examples come from nearest neighbors that are not within the 10000 of the average embedding)

In [None]:
avg_embedding = np.mean(embeddings, axis=0).tolist()
print(avg_embedding)

In [None]:
query_response = index.query(
    vector=avg_embedding,
    top_k = 10000,
    include_metadata=False
)
scores = [match['score'] for match in query_response['matches']]
knns = [match['id'] for match in query_response['matches']]
min_score = 1
for score in scores:
    min_score = min(min_score, score)
print(min_score)
for neighbor in knns:
        selected_imgs.add(neighbor)
print(len(selected_imgs))

##### Organize new data

In [None]:
import shutil
for img in selected_imgs:
    orig_dir = "../data/images/processed"
    new_dir = "../data/images/temp"
    shutil.move(os.path.join(orig_dir, img), os.path.join(new_dir, img))


In [None]:
import sys
import os

sys.path.append(os.path.abspath('../utils'))

import data_management
img_train_dir = '../data/model_data/images/train'
img_val_dir = '../data/model_data/images/validation'

label_train_dir = '../data/model_data/labels/train'
label_val_dir = '../data/model_data/labels/validation'

data_management.train_val_split("../data/images/temp", "../data/labels/formatted", img_train_dir, label_train_dir, img_val_dir, label_val_dir, 0.8)

### Train new model on ISD

In [None]:
from ultralytics import YOLO

from datetime import datetime
model_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")  
model = YOLO("yolov8n.yaml")
results = model.train(
    data = "../SLD.yaml", 
    epochs=30, 
    imgsz=768, 
    device=0, 
    project="../runs", 
    name=f"{model_name}"
)


In [None]:
import video_processing

In [None]:
model_path = "../models/current_assistant/2025-03-27_17-25-00.pt"
video_path = "../data/videos/processed/20250222_154541M.mp4"
video_out = "../images/result_videos/testing_ISD_compare.mp4"
video_processing.predict_video(video_path, video_out, model_path)

#### Compare performance of recently trained model vs ISD

In [None]:
import cv2
import screeninfo

def display_side_by_side(video_path_1, video_path_2):
    # Open the two videos
    cap1 = cv2.VideoCapture(video_path_1)
    cap2 = cv2.VideoCapture(video_path_2)

    if not cap1.isOpened() or not cap2.isOpened():
        print("Error: Could not open one of the videos.")
        return

    # Get the available screens
    monitors = screeninfo.get_monitors()

    # If there is more than one monitor, set the second monitor for full-screen
    if len(monitors) > 1:
        second_monitor = monitors[1]
    else:
        print("Only one monitor detected, displaying on the primary monitor.")
        second_monitor = monitors[0]  # Fall back to the first monitor

    while True:
        # Read a frame from each video
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()

        # If either video is over, break the loop
        if not ret1 or not ret2:
            break

        # Resize frames to have the same height for side-by-side display
        height = min(frame1.shape[0], frame2.shape[0])
        frame1_resized = cv2.resize(frame1, (int(frame1.shape[1] * height / frame1.shape[0]), height))
        frame2_resized = cv2.resize(frame2, (int(frame2.shape[1] * height / frame2.shape[0]), height))
        # Add text label to the top of each frame
        font = cv2.FONT_HERSHEY_SIMPLEX
        frame1_labeled = cv2.putText(frame1_resized, video_path_1, (10, 30), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
        frame2_labeled = cv2.putText(frame2_resized, video_path_2, (10, 30), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
        # Stack the frames side by side
        combined_frame = cv2.hconcat([frame1_labeled, frame2_labeled])

        # Move the window to the second monitor
        cv2.namedWindow("Final Frame", cv2.WND_PROP_FULLSCREEN)
        cv2.moveWindow("Final Frame", second_monitor.x, second_monitor.y)

        # Resize the final frame to fit the second monitor
        second_monitor_width = second_monitor.width
        second_monitor_height = second_monitor.height
        combined_frame_resized = cv2.resize(combined_frame, (second_monitor_width, second_monitor_height))

        # Show the final frame on the second monitor
        cv2.imshow("Final Frame", combined_frame_resized)

        # Set the window to full-screen mode
        cv2.setWindowProperty("Final Frame", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

        # Wait for a key press to proceed to the next frame, or ESC to terminate
        key = cv2.waitKey(0) & 0xFF
        if key == 27:  # ESC key to exit
            break

    # Move the window to the second monitor
    cv2.namedWindow("Final Frame", cv2.WND_PROP_FULLSCREEN)
    cv2.moveWindow("Final Frame", second_monitor.x, second_monitor.y)

    # Resize the final frame to fit the second monitor
    second_monitor_width = second_monitor.width
    second_monitor_height = second_monitor.height
    combined_frame_resized = cv2.resize(combined_frame, (second_monitor_width, second_monitor_height))

    # Show the final frame on the second monitor
    cv2.imshow("Final Frame", combined_frame_resized)

    # Set the window to full-screen mode
    cv2.setWindowProperty("Final Frame", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

    # Wait for the ESC key to close the window
    cv2.waitKey(0)

    # Release video captures and close any OpenCV windows
    cap1.release()
    cap2.release()
    cv2.destroyAllWindows()

# Example usage
video_path_1 = '../images/result_videos/testing_ISD.mp4'
video_path_2 = '../images/result_videos/testing_ISD_compare.mp4'
display_side_by_side(video_path_1, video_path_2)

