In [1]:
import sys
print(sys.executable)

/usr/bin/python


In [6]:
import os
import sys
if os.getcwd().endswith('visualize'):
    os.chdir('..')
sys.path.append(os.getcwd())
from loader.dataloader import VideoDataSet,VideoDataLoader
import torch

dataset ="50salads"
split = "test.split1.bundle"
default_path="./data/data/"
knowns = 14
unknowns = 5
prototypes = 30
video_dataset = VideoDataSet(dataset=dataset,
                               split=split,
                               default_path=default_path,
                               knowns=knowns,
                               unknowns=unknowns,
                               total_classes=knowns + prototypes)
data_loader = VideoDataLoader(video_dataset, batch_size=1, shuffle=True)

In [7]:
from model.bert import ActionBERT, ActionBERTConfig
import torch.nn.functional as F


prototypes = 30
bert_conf = ActionBERTConfig(
    total_classes=knowns + prototypes,
    input_dim=2048,
    d_model=128,
    num_heads=8,
    num_layers=4,
    dropout=0)
model = ActionBERT(config=bert_conf)
path = "./output/actionbert_first_try.pth"


state_dict = torch.load(path, map_location=torch.device('cuda'))
model.load_state_dict(state_dict, strict=False)
model = model.to('cuda')
model.eval()
print("Modell erfolgreich geladen.")

def get_model_pred(features,padding_mask):
    unknown_mask = torch.zeros_like(padding_mask).bool()
    print(unknown_mask)
    recon_feat, class_logits, boundaries  = model(features,unknown_mask,padding_mask)
    
    
    """pred_dist_start = torch.exp(boundaries[:, :, 0]) - 1
    pred_dist_end   = torch.exp(boundaries[:, :, 1]) - 1
   # pred_dist_start = torch.clamp(pred_dist_start, min=0)
    #pred_dist_end   = torch.clamp(pred_dist_end, min=0)
    
    B, T = pred_dist_start.shape
    device = pred_dist_start.device
    
   
    t_indices = torch.arange(T, device=device).unsqueeze(0).expand(B, T)
    vote_start = torch.round(t_indices - pred_dist_start).long()
    vote_end   = torch.round(t_indices + pred_dist_end ).long()
    """
    softmax_logits = F.softmax(class_logits, dim=-1)  
    class_labels = torch.argmax(softmax_logits,dim=-1)
    print(class_labels.shape)
    return class_labels
    
    

Modell erfolgreich geladen.


In [8]:
for batch in data_loader:
    features = batch["features"]
    unknown_mask = batch["unknown_mask"]
    target_truth = batch["target_truth"]
    padding_mask = batch["padding_mask"]
    labels_dict = batch["labels_dict"]
    remap_dict = batch["remap_dict"]
    target_start = batch['target_start']
    target_end = batch['target_end']
    break

In [9]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict

# --- KONFIGURATION ---
SAMPLES_PER_CLASS = 500  # Wie viele Punkte pro Klasse?
NUM_KNOWNS =  14 

# Speicher f√ºr Embeddings und Labels
class_embeddings = defaultdict(list)
classes_filled = 0

print(f"Sammle {SAMPLES_PER_CLASS} Embeddings f√ºr jede der {NUM_KNOWNS} Known Classes...")

# --- 1. DATEN SAMMELN ---
# Wir gehen durch den Dataloader (Batch Size 1 ist hier ok, wir sammeln ja)
with torch.no_grad():
    for batch_idx, batch in enumerate(data_loader):
        features = batch["features"].to('cuda')
        padding_mask = batch["padding_mask"].to('cuda')
        target_truth = batch["target_truth"].to('cuda')
        
        # Forward Pass
        # Wichtig: Wir brauchen den DRITTEN R√ºckgabewert (Embeddings)
        # Patch Mask ist None oder False, da wir Inference machen
        patch_mask = torch.zeros_like(padding_mask).bool() 
        _, _, embeddings = model(features, patch_mask, padding_mask)
        
        # Flatten (Batch-Dimension und Zeit-Dimension zusammenfassen)
        # Wir nehmen nur die validen Frames (padding_mask == True)
        valid_indices = padding_mask.view(-1)
        
        flat_embeds = embeddings.view(-1, embeddings.size(-1))[valid_indices]
        flat_targets = target_truth.view(-1)[valid_indices]
        
        # Auf CPU bringen f√ºr Numpy
        flat_embeds_np = flat_embeds.cpu().numpy()
        flat_targets_np = flat_targets.cpu().numpy()
        
        # In den Speicher sortieren
        for emb, label in zip(flat_embeds_np, flat_targets_np):
            # Wir sammeln nur Knowns (0..13)
            if label < NUM_KNOWNS:
                if len(class_embeddings[label]) < SAMPLES_PER_CLASS:
                    class_embeddings[label].append(emb)
                    
                    # Check, ob diese Klasse jetzt voll ist
                    if len(class_embeddings[label]) == SAMPLES_PER_CLASS:
                        classes_filled += 1
                        print(f"Klasse {label} komplett ({SAMPLES_PER_CLASS} Samples).")
        
        # Abbruch, wenn wir genug Daten haben
        if classes_filled >= NUM_KNOWNS:
            break
        
        if batch_idx > 50: # Sicherheits-Break, falls manche Klassen selten sind
            print("Konnte nicht alle Klassen voll f√ºllen, mache weiter mit dem was da ist.")
            break

# --- 2. DATEN VORBEREITEN F√úR TSNE ---
X = [] # Embeddings
y = [] # Labels

# Sortieren nach Label, damit die Legende h√ºbsch wird
sorted_labels = sorted(class_embeddings.keys())
for label in sorted_labels:
    embs = class_embeddings[label]
    X.extend(embs)
    y.extend([label] * len(embs))

X = np.array(X)
y = np.array(y)

print(f"Starte t-SNE auf {X.shape[0]} Punkten...")

# --- 3. T-SNE BERECHNUNG ---
from mpl_toolkits.mplot3d import Axes3D  # Wichtig f√ºr 3D Plots

# --- 3. T-SNE BERECHNUNG (3D) ---
# n_components=3 ist der Schl√ºssel
tsne = TSNE(n_components=3, perplexity=80, max_iter=1000, init='pca', random_state=42)
X_tsne = tsne.fit_transform(X)

# --- 4. PLOTTEN (3D) ---
if isinstance(labels_dict, list):
    raw_dict = labels_dict[0]
else:
    raw_dict = labels_dict

id_to_name = {v: k for k, v in raw_dict.items()}
import plotly.express as px
import pandas as pd

# --- DATEN VORBEREITEN ---
# Wir wandeln deine Daten in einen Pandas DataFrame um. 
# Das ist das Format, das Plotly am liebsten mag.

# 1. Namen f√ºr jeden Punkt generieren (f√ºr den Hover-Effekt)
# y enth√§lt die IDs (z.B. 0, 0, 0, 1, 1...)
# Wir mappen das zu Strings: "0: cut_tomato"
label_names = [f"{label_id}: {id_to_name.get(label_id, str(label_id))}" for label_id in y]

# 2. DataFrame bauen
df = pd.DataFrame({
    'x': X_tsne[:, 0],
    'y': X_tsne[:, 1],
    'z': X_tsne[:, 2],
    'label': label_names,  # Das wird in der Legende und im Hover angezeigt
    'class_id': y          # Nur intern zur Sortierung
})

# --- PLOTTEN MIT PLOTLY ---
print("Erstelle interaktiven Plot...")

fig = px.scatter_3d(
    df, 
    x='x', y='y', z='z',
    color='label',           # Automatische Einf√§rbung nach Klasse
    symbol='label',          # (Optional) Verschiedene Symbole f√ºr Klassen
    hover_name='label',      # Was steht da, wenn man mit der Maus dr√ºber f√§hrt?
    title="3D Action Embeddings (Interaktiv)",
    opacity=0.7,             # Leicht transparent, damit man durch Wolken durchsehen kann
    width=1000, height=800   # Gr√∂√üe des Fensters
)

# Punkte etwas kleiner und sch√§rfer machen
fig.update_traces(marker=dict(size=3, line=dict(width=0)))

# Layout verbessern (Hintergrundfarbe etc.)
fig.update_layout(
    margin=dict(l=0, r=0, b=0, t=30),  # R√§nder weg
    legend_title_text='Known Actions'
)

# Anzeigen
fig.show()

Sammle 500 Embeddings f√ºr jede der 14 Known Classes...
Klasse 3 komplett (500 Samples).
Klasse 5 komplett (500 Samples).
Klasse 6 komplett (500 Samples).
Klasse 7 komplett (500 Samples).
Klasse 10 komplett (500 Samples).
Klasse 11 komplett (500 Samples).
Klasse 12 komplett (500 Samples).
Klasse 1 komplett (500 Samples).
Klasse 2 komplett (500 Samples).
Klasse 8 komplett (500 Samples).
Klasse 9 komplett (500 Samples).
Klasse 13 komplett (500 Samples).
Klasse 0 komplett (500 Samples).
Klasse 4 komplett (500 Samples).
Starte t-SNE auf 7000 Punkten...
Erstelle interaktiven Plot...


In [43]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
import plotly.express as px
import pandas as pd

# --- KONFIGURATION ---
SAMPLES_PER_CLASS = 500  # Punkte pro Unknown-Klasse
NUM_KNOWNS = 14          # Alles ab ID 14 ist Unknown

# Speicher
class_embeddings = defaultdict(list)
# Wir tracken, welche Unknown-Klassen (14, 15, 16, 17, 18) voll sind
filled_classes = set()

print(f"Sammle {SAMPLES_PER_CLASS} Embeddings f√ºr die UNKNOWN Classes (ab ID {NUM_KNOWNS})...")

# --- 1. DATEN SAMMELN ---
with torch.no_grad():
    for batch_idx, batch in enumerate(data_loader):
        features = batch["features"].to('cuda')
        padding_mask = batch["padding_mask"].to('cuda')
        target_truth = batch["target_truth"].to('cuda')
        
        # Forward Pass
        patch_mask = torch.zeros_like(padding_mask).bool() 
        _, _, embeddings = model(features, patch_mask, padding_mask)
        
        # Flatten
        valid_indices = padding_mask.view(-1)
        flat_embeds = embeddings.view(-1, embeddings.size(-1))[valid_indices]
        flat_targets = target_truth.view(-1)[valid_indices]
        
        flat_embeds_np = flat_embeds.cpu().numpy()
        flat_targets_np = flat_targets.cpu().numpy()
        
        # Sortieren
        for emb, label in zip(flat_embeds_np, flat_targets_np):
            # √ÑNDERUNG: Wir sammeln nur UNKNOWNS (>= 14)
            if label >= NUM_KNOWNS:
                if len(class_embeddings[label]) < SAMPLES_PER_CLASS:
                    class_embeddings[label].append(emb)
                    
                    if len(class_embeddings[label]) == SAMPLES_PER_CLASS:
                        filled_classes.add(label)
                        print(f"Unknown-Klasse {label} komplett ({SAMPLES_PER_CLASS} Samples).")
        
        # Abbruch: Haben wir alle 5 Unknowns voll?
        # (Annahme: Es gibt 5 Unknowns: 14, 15, 16, 17, 18)
        if len(filled_classes) >= 5:
            break
        
        if batch_idx > 100: # Safety Break
            print(f"Konnte nicht alle f√ºllen. Gefundene Klassen: {list(filled_classes)}")
            break

# --- 2. VORBEREITUNG ---
X = []
y = []

# Sortieren f√ºr Legende
sorted_labels = sorted(class_embeddings.keys())
for label in sorted_labels:
    embs = class_embeddings[label]
    X.extend(embs)
    y.extend([label] * len(embs))

X = np.array(X)
y = np.array(y)

print(f"Starte t-SNE auf {X.shape[0]} Unknown-Punkten...")

# --- 3. T-SNE ---
# Hohe Perplexity (50-80) hilft oft, Cluster besser zu trennen
tsne = TSNE(n_components=3, perplexity=50, max_iter=1000, init='pca', random_state=42)
X_tsne = tsne.fit_transform(X)

# --- 4. NAMEN MAPPING ---
# Wir holen die echten Namen aus dem Dict
if isinstance(labels_dict, list):
    raw_dict = labels_dict[0]
else:
    raw_dict = labels_dict

id_to_name = {v: k for k, v in raw_dict.items()}

# Label-Text f√ºr Plotly erstellen
# Wir schreiben dazu "(Unknown)", damit es klar ist
plot_labels = [f"{lbl}: {id_to_name.get(lbl, str(lbl))} (GT)" for lbl in y]

# --- 5. PLOTLY 3D ---
df = pd.DataFrame({
    'x': X_tsne[:, 0],
    'y': X_tsne[:, 1],
    'z': X_tsne[:, 2],
    'label': plot_labels,
    'id': y
})

fig = px.scatter_3d(
    df, 
    x='x', y='y', z='z',
    color='label', 
    title="3D t-SNE: Struktur der UNKNOWN Actions (ActionBERT)",
    opacity=0.8,
    width=1000, height=800
)

fig.update_traces(marker=dict(size=4))
fig.update_layout(legend_title_text='Ground Truth (Unknowns)')
fig.show()

Sammle 500 Embeddings f√ºr die UNKNOWN Classes (ab ID 14)...
Unknown-Klasse 14 komplett (500 Samples).
Unknown-Klasse 15 komplett (500 Samples).
Unknown-Klasse 16 komplett (500 Samples).
Unknown-Klasse 18 komplett (500 Samples).
Unknown-Klasse 17 komplett (500 Samples).
Starte t-SNE auf 2500 Unknown-Punkten...
