In [65]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import ast
import plotly.express as px
from sklearn.preprocessing import LabelEncoder, StandardScaler
from umap import UMAP
from sklearn.manifold import TSNE

In [87]:
df = pd.read_csv('full_embedded_data.csv')
df.drop(columns=['Unnamed: 0', 'sample'], inplace=True)

In [88]:
df['vector'] = df['vector'].apply(lambda x: np.array(ast.literal_eval(x))) 
df['label_multi'] = df['label_multi'].apply(lambda x: ast.literal_eval(x.replace(' ',',')))
df['single_label'] = df['label_multi'].apply(lambda x: sum(x)==1)

In [89]:
df = df.loc[~df['label'].apply(lambda x: 'akimbo' in x or 'Moving legs' in x)]

In [90]:
len(df)

1059

In [91]:
X = np.stack(df['vector'].values)
X = StandardScaler().fit_transform(X)
# Encode the string labels to integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['label'])

In [92]:
df.columns

Index(['emb1', 'emb2', 'label', 'vector', 'label_multi', 'inds', 'file_paths',
       'file_names', 'single_label'],
      dtype='object')

In [93]:
reducer_umap = UMAP(n_components=2, n_neighbors=5)
df[['umap1', 'umap2']] = reducer_umap.fit_transform(X,y=y)

In [94]:
reducer_tsne = TSNE(n_components=2, perplexity=150, init='pca', learning_rate='auto')
df[['tsne1', 'tsne2']] = reducer_tsne.fit_transform(X,y=y)



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



In [97]:
def interactive_scatter_plot(df, var='umap'):
    fig = px.scatter(df, x=f'{var}1', y=f'{var}2', color='label', hover_data=['label', 'file_paths'])

    fig.update_layout(title='Interactive Scatter Plot', 
                      xaxis_title='emb1', 
                      yaxis_title='emb2')

    # Define the on click event
    fig.data[0].on_click(lambda trace, points, state: print(df.iloc[points.point_inds[0]]['file_paths']))

    fig.show()




In [98]:
interactive_scatter_plot(df.loc[df['single_label'] == 1], var='umap')