In [1]:
from google.colab import files
files.upload();

Saving saved_features_sample.zip to saved_features_sample.zip


In [2]:
!unzip -q /content/saved_features_sample.zip

In [3]:
import torch
import numpy as np
import json
from sklearn.decomposition import PCA

# **Working Example**

In [4]:
with open('/content/saved_features_sample/captions.json', 'r') as file:
    video_captions = json.load(file)

In [5]:
import torch
import numpy as np
from sklearn.decomposition import PCA

# Load the tensors
feat_audio = torch.load('/content/saved_features_sample/feat_audio.pt').cpu().detach().numpy()
feat_video = torch.load('/content/saved_features_sample/feat_video.pt').cpu().detach().numpy()
feat_text  = torch.load('/content/saved_features_sample/feat_text.pt').cpu().detach().numpy()

print("Loaded tensors and their shapes:")
print("Audio:", feat_audio.shape)
print("Video:", feat_video.shape)
print("Text :", feat_text.shape)

# Fit PCA on all features combined to get a shared embedding space
all_features = np.vstack([feat_audio, feat_video, feat_text])

print("Fitting PCA on combined feature space...")
pca = PCA(n_components=3, random_state=42)
all_reduced = pca.fit_transform(all_features)

# Split back into modalities
N = feat_audio.shape[0]  # number of samples (triplets)

audio_3d = all_reduced[0:N]
video_3d = all_reduced[N:2*N]
text_3d  = all_reduced[2*N:3*N]

print("Reduced shapes:")
print("Audio 3D:", audio_3d.shape)
print("Video 3D:", video_3d.shape)
print("Text 3D :", text_3d.shape)

np.save("audio_3d.npy", audio_3d)
np.save("video_3d.npy", video_3d)
np.save("text_3d.npy", text_3d)

print("Saved audio_3d.npy, video_3d.npy, text_3d.npy")

Loaded tensors and their shapes:
Audio: (7, 512)
Video: (7, 512)
Text : (7, 512)
Fitting PCA on combined feature space...
Reduced shapes:
Audio 3D: (7, 3)
Video 3D: (7, 3)
Text 3D : (7, 3)
Saved audio_3d.npy, video_3d.npy, text_3d.npy


In [6]:
import plotly.graph_objects as go
import numpy as np
from IPython.display import display

def normalize_vector(v):
    return v / np.linalg.norm(v)

raw_vectors = {
    'A1': audio_3d[0],
    'V1': video_3d[0],
    'T1': text_3d[0],
}

norm_vectors = {name: normalize_vector(vec) for name, vec in raw_vectors.items()}

names = list(norm_vectors.keys())
x_coords = [v[0].item() for v in norm_vectors.values()]
y_coords = [v[1].item() for v in norm_vectors.values()]
z_coords = [v[2].item() for v in norm_vectors.values()]

coords = {
    name: np.array([v[0].item(), v[1].item(), v[2].item()])
    for name, v in norm_vectors.items()
}

colors = ["blue" for _ in names]

fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=x_coords,
    y=y_coords,
    z=z_coords,
    mode='markers+text',
    marker=dict(size=8, color=colors),
    text=names,
    textposition="top center"
))

for name, (x, y, z), color in zip(names, zip(x_coords, y_coords, z_coords), colors):
    fig.add_trace(go.Scatter3d(
        x=[0, x],
        y=[0, y],
        z=[0, z],
        mode='lines',
        line=dict(width=5, dash='dot', color=color),
        hoverinfo='skip'
    ))

A = coords["A1"]
V = coords["V1"]
T = coords["T1"]

triangle_x = [A[0], V[0], T[0], A[0]]
triangle_y = [A[1], V[1], T[1], A[1]]
triangle_z = [A[2], V[2], T[2], A[2]]

fig.add_trace(go.Scatter3d(
    x=triangle_x,
    y=triangle_y,
    z=triangle_z,
    mode='lines',
    line=dict(color="red", width=6),
    name="Triangle A1-V1-T1 (Edges)"
))

fig.add_trace(go.Mesh3d(
    x=[A[0], V[0], T[0]],
    y=[A[1], V[1], T[1]],
    z=[A[2], V[2], T[2]],
    i=[0], j=[1], k=[2],      # single triangular face
    opacity=0.25,            # adjust transparency here
    color='red',
    name='Triangle Surface'
))

u = np.linspace(0, 2 * np.pi, 60)
v = np.linspace(0, np.pi, 60)

x_sphere = np.outer(np.cos(u), np.sin(v))
y_sphere = np.outer(np.sin(u), np.sin(v))
z_sphere = np.outer(np.ones(len(u)), np.cos(v))

fig.add_trace(go.Surface(
    x=x_sphere,
    y=y_sphere,
    z=z_sphere,
    colorscale='Greys',
    opacity=0.15,
    showscale=False
))

fig.update_layout(
    scene=dict(
        xaxis=dict(range=[-1,1]),
        yaxis=dict(range=[-1,1]),
        zaxis=dict(range=[-1,1]),
        aspectmode='cube'
    ),
    title=f"3D Triangle Between A1, V1, T1 on the Unit Sphere<br>Video Description: {video_captions[0]}",
)

fig.show()

In [7]:
import plotly.graph_objects as go
import numpy as np
from IPython.display import display

def normalize_vector(v):
    return v / np.linalg.norm(v)

raw_vectors = {
    'A2': audio_3d[1],
    'V2': video_3d[1],
    'T2': text_3d[1],
}

# Normalize vectors
norm_vectors = {name: normalize_vector(vec) for name, vec in raw_vectors.items()}

names = list(norm_vectors.keys())
x_coords = [v[0].item() for v in norm_vectors.values()]
y_coords = [v[1].item() for v in norm_vectors.values()]
z_coords = [v[2].item() for v in norm_vectors.values()]

coords = {
    name: np.array([v[0].item(), v[1].item(), v[2].item()])
    for name, v in norm_vectors.items()
}

colors = ["blue" for _ in names]

fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=x_coords,
    y=y_coords,
    z=z_coords,
    mode='markers+text',
    marker=dict(size=8, color=colors),
    text=names,
    textposition="top center"
))

for name, (x, y, z), color in zip(names, zip(x_coords, y_coords, z_coords), colors):
    fig.add_trace(go.Scatter3d(
        x=[0, x],
        y=[0, y],
        z=[0, z],
        mode='lines',
        line=dict(width=5, dash='dot', color=color),
        hoverinfo='skip'
    ))

A = coords["A2"]
V = coords["V2"]
T = coords["T2"]

triangle_x = [A[0], V[0], T[0], A[0]]
triangle_y = [A[1], V[1], T[1], A[1]]
triangle_z = [A[2], V[2], T[2], A[2]]

fig.add_trace(go.Scatter3d(
    x=triangle_x,
    y=triangle_y,
    z=triangle_z,
    mode='lines',
    line=dict(color="red", width=6),
    name="Triangle A1-V1-T1 (Edges)"
))

fig.add_trace(go.Mesh3d(
    x=[A[0], V[0], T[0]],
    y=[A[1], V[1], T[1]],
    z=[A[2], V[2], T[2]],
    i=[0], j=[1], k=[2],      # single triangular face
    opacity=0.25,            # adjust transparency here
    color='red',
    name='Triangle Surface'
))

u = np.linspace(0, 2 * np.pi, 60)
v = np.linspace(0, np.pi, 60)

x_sphere = np.outer(np.cos(u), np.sin(v))
y_sphere = np.outer(np.sin(u), np.sin(v))
z_sphere = np.outer(np.ones(len(u)), np.cos(v))

fig.add_trace(go.Surface(
    x=x_sphere,
    y=y_sphere,
    z=z_sphere,
    colorscale='Greys',
    opacity=0.15,
    showscale=False
))

fig.update_layout(
    scene=dict(
        xaxis=dict(range=[-1,1]),
        yaxis=dict(range=[-1,1]),
        zaxis=dict(range=[-1,1]),
        aspectmode='cube'
    ),
    title=f"3D Triangle Between A2, V2, T2 on the Unit Sphere<br>Video Description: {video_captions[1]}",
)

fig.show()

In [8]:
import plotly.graph_objects as go
import numpy as np
from IPython.display import display

def normalize_vector(v):
    return v / np.linalg.norm(v)

raw_vectors = {
    'A1': audio_3d[0],
    'V1': video_3d[0],
    'T2': text_3d[1],
}

# Normalize vectors
norm_vectors = {name: normalize_vector(vec) for name, vec in raw_vectors.items()}

names = list(norm_vectors.keys())
x_coords = [v[0].item() for v in norm_vectors.values()]
y_coords = [v[1].item() for v in norm_vectors.values()]
z_coords = [v[2].item() for v in norm_vectors.values()]

coords = {
    name: np.array([v[0].item(), v[1].item(), v[2].item()])
    for name, v in norm_vectors.items()
}

colors = ["blue" for _ in names]

fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=x_coords,
    y=y_coords,
    z=z_coords,
    mode='markers+text',
    marker=dict(size=8, color=colors),
    text=names,
    textposition="top center"
))

for name, (x, y, z), color in zip(names, zip(x_coords, y_coords, z_coords), colors):
    fig.add_trace(go.Scatter3d(
        x=[0, x],
        y=[0, y],
        z=[0, z],
        mode='lines',
        line=dict(width=5, dash='dot', color=color),
        hoverinfo='skip'
    ))

A = coords["A1"]
V = coords["V1"]
T = coords["T2"]

triangle_x = [A[0], V[0], T[0], A[0]]
triangle_y = [A[1], V[1], T[1], A[1]]
triangle_z = [A[2], V[2], T[2], A[2]]

fig.add_trace(go.Scatter3d(
    x=triangle_x,
    y=triangle_y,
    z=triangle_z,
    mode='lines',
    line=dict(color="red", width=6),
    name="Triangle A1-V1-T1 (Edges)"
))

fig.add_trace(go.Mesh3d(
    x=[A[0], V[0], T[0]],
    y=[A[1], V[1], T[1]],
    z=[A[2], V[2], T[2]],
    i=[0], j=[1], k=[2],      # single triangular face
    opacity=0.25,            # adjust transparency here
    color='red',
    name='Triangle Surface'
))

u = np.linspace(0, 2 * np.pi, 60)
v = np.linspace(0, np.pi, 60)

x_sphere = np.outer(np.cos(u), np.sin(v))
y_sphere = np.outer(np.sin(u), np.sin(v))
z_sphere = np.outer(np.ones(len(u)), np.cos(v))

fig.add_trace(go.Surface(
    x=x_sphere,
    y=y_sphere,
    z=z_sphere,
    colorscale='Greys',
    opacity=0.15,
    showscale=False
))

fig.update_layout(
    scene=dict(
        xaxis=dict(range=[-1,1]),
        yaxis=dict(range=[-1,1]),
        zaxis=dict(range=[-1,1]),
        aspectmode='cube'
    ),
    title="3D Triangle Between A1, V1, T2 on the Unit Sphere<br>Contrastive Example",
)

fig.show()