In [None]:
import torch
import pandas as pd
import sys
sys.path.append('/mnt/raid/C1_ML_Analysis/source/famli-ultra-sim/')
sys.path.append('/mnt/raid/C1_ML_Analysis/source/famli-ultra-sim/dl')
from dl.loaders import ultrasound_dataset
from torch.utils.data import DataLoader
import  numpy as np
import pickle
import os
import plotly.express as px

In [None]:
# df = pd.read_parquet('/mnt/raid/C1_ML_Analysis/CSV_files/extract_frames_Dataset_C_masked_resampled_256_spc075_wscores_meta_noflyto_100K_train.parquet')
# df

mount_point = '/mnt/raid/C1_ML_Analysis'

out_dir = "test_output/classification/extract_frames_Dataset_C_masked_resampled_256_spc075_wscores_meta_noflyto_100K_train/epoch=9-val_loss=0.27"

df = pd.read_parquet(os.path.join(mount_point, out_dir, 'extract_frames_Dataset_C_masked_resampled_256_spc075_wscores_meta_noflyto_100K_train_prediction.parquet'))


In [None]:
mount_point = '/mnt/raid/C1_ML_Analysis/'

from dl.transforms import ultrasound_transforms

eval_transform = ultrasound_transforms.DinoUSEvalTransforms()

val_ds = ultrasound_dataset.USDatasetV2(df, mount_point=mount_point, transform=eval_transform)

In [None]:
us_dl = DataLoader(val_ds, batch_size=128, num_workers=8, persistent_workers=True, pin_memory=True, shuffle=False)

In [None]:
feat_fn = '/mnt/raid/C1_ML_Analysis/dino_v2_features/extract_frames_Dataset_C_masked_resampled_256_spc075_wscores_meta_noflyto_100K_train_features.pickle'

if not os.path.exists(feat_fn):
    dinov2_vitl14_reg = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14_reg')
    dinov2_vitl14_reg.eval()
    dinov2_vitl14_reg.cuda()

    features_arr = []
    with torch.no_grad():
        for i, batch in enumerate(us_dl):
            
            x = batch
            features = dinov2_vitl14_reg(x.cuda())
            features_arr.append(features.cpu().numpy())
    
    features_arr = np.concatenate(features_arr, axis=0)
    pickle.dump(features_arr, open(feat_fn, 'wb'))

In [None]:
features = pickle.load(open(feat_fn, 'rb')).reshape(-1, 1024)

In [None]:
from sklearn.manifold import TSNE
# Apply t-SNE for dimensionality reduction to 2D
tsne = TSNE(n_components=2, perplexity=500, n_iter=300, random_state=42)
tsne_results = tsne.fit_transform(features)

In [None]:
import random
color_map = {}

def generate_random_color():
    return "#{:06x}".format(random.randint(0, 0xFFFFFF))

def generate_random_colors(num_colors):
    return [generate_random_color() for _ in range(num_colors)]


# Generate random colors
unique_categories = df['pred_class'].unique()
random_colors = generate_random_colors(len(unique_categories))
color_map = {category: color for category, color in zip(unique_categories, random_colors)}

In [None]:
df['tsne_0'] = tsne_results[:,0]
df['tsne_1'] = tsne_results[:,1]

fig = px.scatter(df.sample(n=5000), x='tsne_0', y='tsne_1', color='pred_class')
fig.update_traces(marker=dict(color=[color_map[cat] for cat in df['pred_class']]))
fig

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
reduced_features = pca.fit_transform(features)

df['pca_0'] = reduced_features[:,0]
df['pca_1'] = reduced_features[:,1]

fig = px.scatter(df.sample(n=1000), x='pca_0', y='pca_1', color='score')
# fig.update_traces(marker=dict(color=[color_map[cat] for cat in df['pred_class']]))
fig

In [None]:
feat_fn_sn = '/mnt/raid/C1_ML_Analysis/test_output/contrastive_learning/extract_frames_blind_sweeps_c1_30082022_wscores_simscorew_rr_rs_w16/epoch=125-val_loss=102.47/extract_frames_blind_sweeps_c1_30082022_wscores_test_perplexity_300_sample.pickle'
feat_sn = pickle.load(open(feat_fn_sn, 'rb'))
df_sn = pd.read_parquet('/mnt/raid/C1_ML_Analysis/test_output/contrastive_learning/extract_frames_blind_sweeps_c1_30082022_wscores_simscorew_rr_rs_w16/epoch=125-val_loss=102.47/extract_frames_blind_sweeps_c1_30082022_wscores_test_perplexity_300_sample.parquet')
