In [9]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

clip_embedding_cache = {}

def _load_embedding(clip_embedding_path):
    filename, index = clip_embedding_path.split("#")
    index = int(index)

    # Load Numpy array from file or cache
    if filename not in clip_embedding_cache:
        arr = np.load(f'/opt/viva/output/embeddings/{filename}')
        clip_embedding_cache[filename] = arr
    else:
        arr = clip_embedding_cache[filename]
        
    # Extract row vector
    row = arr[index]

    return row

# Step 1: Load the Parquet File
required_columns = ['local_id', 'score', 'clip_embedding', 'label']
df = pd.read_parquet('/opt/viva/analysis/results/results_variable,ucf101,clc@0.1,drumming-otherstuff', columns=required_columns)


# Find the index of the highest score for each frame
idx = df.groupby('local_id')['score'].idxmax()

# Filter the DataFrame using these indices
filtered_df = df.loc[idx]

# Step 2: Extract Embeddings and Labels
all_embeddings = []

for index, row in df.iterrows():
    file_name = row['clip_embedding'].split('#')[0]
    actual_file_name = file_name.split('/')[-1]
    embedding_index = int(row['clip_embedding'].split('#')[-1])  # again, adjust as necessary
    file_name = actual_file_name + '#' + str(embedding_index)
    embedding = _load_embedding(file_name)
    all_embeddings.append(embedding)

all_embeddings = np.array(all_embeddings)
labels = df['label'].values

# Step 3: Reduce Dimensionality using t-SNE
tsne_embeddings = TSNE(n_components=2).fit_transform(all_embeddings)

# Step 4: Plot the Reduced Embeddings
plt.figure(figsize=(10,8))
scatter = plt.scatter(tsne_embeddings[:, 0], tsne_embeddings[:, 1], c=labels, cmap='jet')
plt.colorbar(scatter)
plt.title('t-SNE Visualization of Embeddings')
plt.show()



KeyboardInterrupt: 

In [6]:
import torch
import clip
import numpy as np
import os

# 1. Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, _ = clip.load('ViT-B/32', device=device)

reference_labels = [
    "Human-Object Interaction", 
    "Body-Motion Only", 
    "Human-Human Interaction", 
    "Playing Musical Instruments", 
    "Sports"
]

# 2. Tokenize and encode the reference_labels
texts = clip.tokenize(reference_labels).to(device)

with torch.no_grad():
    text_embeddings = model.encode_text(texts).cpu().numpy()

# 3. Write the embeddings to the specified path
os.makedirs('/opt/viva/tmp/embeddings', exist_ok=True)

for label, embedding in zip(reference_labels, text_embeddings):
    filename = label.lower().replace(" ", "_") + ".npy"
    filepath = os.path.join('/opt/viva/tmp/embeddings', filename)
    np.save(filepath, embedding)

print("Embeddings saved!")


Embeddings saved!


In [16]:
from shutil import copy2
import os
import numpy as np
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

clip_embedding_cache = {}
  
# Update _load_embedding to ensure it returns float64 data
def _load_embedding(clip_embedding_path):
    filename, index = clip_embedding_path.split("#")
    index = int(index)
    embedding_file_name = filename.split("/")[-1]

    # Load Numpy array from file or cache
    if filename not in clip_embedding_cache:
        arr = np.load(f'/opt/viva/output/embeddings/{embedding_file_name}')
        clip_embedding_cache[f'/opt/viva/output/embeddings/{embedding_file_name}'] = arr
    else:
        arr = clip_embedding_cache[f'/opt/viva/output/embeddings/{embedding_file_name}']
        
    # Extract row vector
    row = arr[index]

    return row

# Load all embeddings
directory = '/opt/viva/output/embeddings'
embedding_files = [f for f in os.listdir(directory) if f.startswith('ucf101')]
embeddings = [np.load(os.path.join(directory, file)) for file in embedding_files]
all_embeddings = np.concatenate(embeddings, axis=0)
all_embeddings = all_embeddings.astype(np.float64)

# Perform t-SNE dimensionality reduction
tsne = TSNE(n_components=3)
projected_embeddings = tsne.fit_transform(all_embeddings)

# Cluster the data into, e.g., 5 clusters using K-means
n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters).fit(projected_embeddings)
labels = kmeans.labels_

# Calculate the mean embedding for each cluster
mean_embeddings = []
for i in range(n_clusters):
    cluster_embeddings = all_embeddings[labels == i]
    mean_embedding = np.mean(cluster_embeddings, axis=0)
    mean_embeddings.append(mean_embedding)
mean_embeddings = np.array(mean_embeddings)

# Load reference embeddings
reference_labels = [
    "Human-Object Interaction", 
    "Body-Motion Only", 
    "Human-Human Interaction", 
    "Playing Musical Instruments", 
    "Sports"
]

reference_filenames = [label.lower().replace(" ", "_") + ".npy" for label in reference_labels]
reference_embeddings_path = '/opt/viva/tmp/embeddings'
reference_embeddings = [np.load(os.path.join(reference_embeddings_path, filename)) for filename in reference_filenames]
reference_embeddings = np.vstack(reference_embeddings)

# Compute similarity to reference embeddings
similarities = cosine_similarity(mean_embeddings, reference_embeddings)

# Assign each embedding to the reference label with the highest similarity for its cluster
embedding_assignments = []
for label in labels:
    # Get the index of the most similar reference label for the cluster
    similar_label_idx = np.argmax(similarities[label])
    embedding_assignments.append(similar_label_idx)
    
category_assignments = [reference_labels[i] for i in embedding_assignments]

category_colors = {
    "Human-Object Interaction": "red",
    "Body-Motion Only": "blue",
    "Human-Human Interaction": "green",
    "Playing Musical Instruments": "yellow",
    "Sports": "purple"
}

point_colors = [category_colors[category] for category in category_assignments]
    
  # Create the 3D scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=projected_embeddings[:, 0],
#     y=projected_embeddings[:, 1],
#     z=projected_embeddings[:, 2],
#     mode='markers',
#     marker=dict(
#         size=5,
#         color=point_colors,  # Set color to an array/list of desired values
#         opacity=0.8
#     ),
#     text=category_assignments  # This will show the category name when hovering over a point
# )])

# # Update the layout and show the figure
# fig.update_layout(
#     margin=dict(l=0, r=0, b=0, t=0),
#     scene=dict(
#         xaxis_title="X Axis",
#         yaxis_title="Y Axis",
#         zaxis_title="Z Axis"
#     ),
#     legend_title_text="Reference Labels"
# )

# fig.show()

results_df = pd.read_parquet('/opt/viva/analysis/results/results_variable,ucf101,clc@0.1,drumming-otherstuff')
results_df = results_df.drop_duplicates(subset=['frameuri'])

from shutil import copy2

# 1. Cluster the data in original embedding space
kmeans_high_dim = KMeans(n_clusters=n_clusters).fit(all_embeddings)

# 2. Calculate mean embeddings for each cluster in this space
mean_embeddings_high_dim = []
for i in range(n_clusters):
    cluster_embeddings = all_embeddings[kmeans_high_dim.labels_ == i]
    mean_embedding = np.mean(cluster_embeddings, axis=0)
    mean_embeddings_high_dim.append(mean_embedding)
mean_embeddings_high_dim = np.array(mean_embeddings_high_dim)

# Compute similarity to reference embeddings for the high-dimensional cluster centers
similarities_high_dim = cosine_similarity(mean_embeddings_high_dim, reference_embeddings)

# Directory to save the sports images
save_dir = '/opt/viva/tmp/ucf101'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# Go through each row in results_df
for _, row in results_df.iterrows():
    # Load the embedding using the function you provided
    embedding = _load_embedding(row['clip_embedding'])
    
    # Get the cluster label for this embedding
    label = kmeans_high_dim.predict([embedding])[0]
    
    # Get the category assignment for this label
    category = reference_labels[np.argmax(similarities_high_dim[label])]
    
    # Check if the category is "Sports"
    if category == "Sports":
        # Save the associated image
        # Assuming the path to the image in results_df is 'image_path'
        image_path = row['frameuri']
        destination_path = os.path.join(save_dir, f"sports_{os.path.basename(image_path)}")
        copy2(image_path, destination_path)




In [17]:
import os
import numpy as np
from PIL import Image
import cv2

# Define source and destination directories
source_dir = '/opt/viva/tmp/ucf101_frames_npy'
destination_dir = '/opt/viva/tmp/ucf101_sport_images'

# Create the destination directory if it doesn't exist
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Iterate through each file in the source directory
for filename in os.listdir(source_dir):
    if filename.endswith('.npy'):
        # Load the numpy array
        frame_array = np.load(os.path.join(source_dir, filename))
        frame = cv2.cvtColor(frame_array, cv2.COLOR_BGR2RGB)
        # Convert the numpy array to an image
        image = Image.fromarray(frame_array.astype('uint8'))
        
        
        # Save the image to the destination directory
        # Assuming the filenames are like 'frame_001.npy', 'frame_002.npy', etc.
        # The images will be saved as 'frame_001.jpg', 'frame_002.jpg', etc.
        image_name = os.path.splitext(filename)[0] + '.jpg'
        image_path = os.path.join(destination_dir, image_name)
        image.save(image_path)

print("Conversion completed!")


Conversion completed!
