In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
from tqdm import tqdm
import os

  from .autonotebook import tqdm as notebook_tqdm


### Encode all captions and save top .npy

In [None]:
# --- CONFIGURATION ---
index_file = "/Users/akhidre/pubgit/HumanML3D/HumanML3D/train.txt"   # index file listing caption IDs
base_path = "/Users/akhidre/pubgit/HumanML3D/HumanML3D/texts"   # folder containing the actual caption text files
output_file = "text_embeddings.npy"
batch_size = 8

# --- LOAD MODEL ---
model = SentenceTransformer('all-MiniLM-L6-v2')

# --- READ INDEX FILE ---
with open(index_file, 'r') as f:
    caption_paths = [line.strip() for line in f if line.strip()]

print(f"Found {len(caption_paths)} caption entries in index file")

# --- COLLECT CAPTIONS ---
captions = []
for path in tqdm(caption_paths, desc="Loading captions"):
    # ✅ Add .txt automatically if missing
    if not path.endswith(".txt"):
        path = path + ".txt"

    full_path = os.path.join(base_path, path)
    if not os.path.exists(full_path):
        print(f"⚠️ Skipping missing file: {full_path}")
        continue

    with open(full_path, 'r') as f:
        # Remove metadata (after '#') and empty lines
        lines = [l.strip().split('#')[0] for l in f if l.strip()]
        captions.extend(lines)

print(f"✅ Total captions collected: {len(captions)}")

# --- GENERATE EMBEDDINGS ---
embeddings = model.encode(captions, batch_size=batch_size, show_progress_bar=True)
embeddings = np.array(embeddings)

# --- SAVE TO DISK ---
np.save(output_file, embeddings)
print(f"✅ Saved {embeddings.shape} embeddings to {output_file}")


### Pairing encoded text caption (z-text) and motion and save to .npz
user input how many files want to encode

In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
from tqdm import tqdm
import os

# --- CONFIG ---
index_file = "/Users/akhidre/pubgit/HumanML3D/HumanML3D/val.txt"
motion_path = "/Users/akhidre/pubgit/HumanML3D/HumanML3D/new_joints"
caption_path = "/Users/akhidre/pubgit/HumanML3D/HumanML3D/texts"
output_file = "/Users/akhidre/pubgit/HumanML3D/HumanML3D/paired_text_motion_val.npz"
max_files =[]   # optional, for testing

# --- LOAD MODEL ---
model = SentenceTransformer('all-MiniLM-L6-v2')

# --- READ INDEX FILE ---
with open(index_file, 'r') as f:
    motion_ids = [line.strip() for line in f if line.strip()]

if max_files:
    motion_ids = motion_ids[:max_files]

paired_data = {"z_texts": [], "motions": [], "motion_ids": []}

# --- MAIN LOOP ---
for motion_id in tqdm(motion_ids, desc="Pairing text and motion"):
    motion_file = os.path.join(motion_path, f"{motion_id}.npy")
    caption_file = os.path.join(caption_path, f"{motion_id}.txt")

    if not (os.path.exists(motion_file) and os.path.exists(caption_file)):
        print(f"⚠️ Skipping missing pair: {motion_id}")
        continue

    # Load motion
    motion = np.load(motion_file)  # shape: (T, 22, 3)
    
    # Load captions
    with open(caption_file, 'r') as f:
        captions = [l.strip().split('#')[0] for l in f if l.strip()]

    # Compute embeddings
    z_texts = model.encode(captions, show_progress_bar=False)

    # Store all captions from this motion file
    for z in z_texts:
        paired_data["z_texts"].append(z)
        paired_data["motions"].append(motion)
        paired_data["motion_ids"].append(motion_id)

# --- SAVE COMPACTLY ---
np.savez_compressed(output_file,
                    z_texts=np.array(paired_data["z_texts"], dtype=np.float32),
                    motions=np.array(paired_data["motions"], dtype=object),
                    motion_ids=np.array(paired_data["motion_ids"]))

print(f"✅ Saved paired text-motion dataset to {output_file}")
print(f"Total pairs: {len(paired_data['z_texts'])}")


### Loading paired texted embedding and motion sequence

In [4]:
data = np.load("/Users/akhidre/pubgit/HumanML3D/HumanML3D/paired_text_motion.npz", allow_pickle=True)
z_texts = data["z_texts"]
motions = data["motions"]

# Example access:
i = 22
print("z_text shape:", z_texts[i].shape)

print("motion shape:", motions[i].shape)
print(z_texts.shape)







z_text shape: (384,)
motion shape: (199, 22, 3)
(69896, 384)


### Check maximum frames in HumanML3D dataset 

In [None]:
import os
import numpy as np

motion_dir = "/Users/akhidre/pubgit/HumanML3D/HumanML3D/new_joints"

max_len = 0
files_with_max = []

# Loop through all motion files
for fname in os.listdir(motion_dir):
    if not fname.endswith(".npy"):
        continue
    motion = np.load(os.path.join(motion_dir, fname))
    T = motion.shape[0]
    
    if T > max_len:
        max_len = T
        files_with_max = [fname]
    elif T == max_len:
        files_with_max.append(fname)

print(f"Maximum number of frames: {max_len}")
print("Files with maximum frames:")
for f in files_with_max:
    print(f)
