# Notebook Overview

In this notebook we will sample and classify the HumanML3D dataset along with other datasets like DanceDB , MOYO, CNRS.
The prerequiste of running this notebook is that we have the npy_to_text_mapping.xlsx ready to use by running the pervious scripts sequentially.

In [2]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import re
import pandas as pd
import os
import numpy as np
from tqdm import tqdm

Mounted at /content/drive


In [7]:
cd /content/drive/MyDrive/TRME

/content/drive/MyDrive/TRME


In [8]:
dset_dir = './Dataset'
hml3d_dir = './Dataset/HumanML3D'
moyo_dir = './Dataset/MOYO'
hml3d_f2l = './TRME/npy_to_text_mapping.xlsx'

In [9]:
df = pd.read_excel(hml3d_f2l)
df.head()
print(df.shape)

(29034, 5)


#Adding MOYO Dataset Data

In [10]:
# Paths to directories containing .npy files and associated text files for MOYO dataset
moyo_nj = './TRME/MOYO/new_joints'
moyo_text_dir = './TRME/MOYO/new_joints'

# Lists to store data
moyo_paths = []
moyo_arr = []
moyo_text = []
st_frame = []
end_frame = []

for file in tqdm(os.listdir(moyo_nj), desc="Processing files"):
    if file.endswith('.npy'):
        npy_path = os.path.join(moyo_nj, file)
        moyo_paths.append(npy_path)  # Store the full path
        moyo_arr.append(np.load(npy_path).flatten())
        st_frame.append(0)
        end_frame.append(len(moyo_arr[-1]))  # Use the length of the flattened array
        if file[0] == 'M':
            file_name_for_text = file[1:-4] + '.txt'
        else:
            file_name_for_text = file[:-4] + '.txt'
        file_path = os.path.join(moyo_text_dir, file_name_for_text)
        if os.path.exists(file_path):
            with open(file_path, 'r') as text_file:
                moyo_text.append(text_file.read())
        else:
            moyo_text.append("Text file not found")

data = []
for i in range(len(moyo_paths)):
    entry = {
        'filename': moyo_paths[i],  # Store the full path of the .npy file
        'motion_array': moyo_arr[i],  # The flattened numpy array
        'start_frame': st_frame[i],  # Start frame
        'end_frame': end_frame[i],  # End frame
        'text_description': moyo_text[i],  # Corresponding text description
    }
    data.append(entry)

new_df = pd.DataFrame(data)
df = pd.concat([df, new_df], ignore_index=True)  # Reset the index, should always be true
print(df.head())


Processing files: 100%|██████████| 342/342 [00:08<00:00, 40.79it/s] 

      filename                                       motion_array  \
0  M013957.npy  [0.0, 0.9644602537155151, 0.0, 0.0577980577945...   
1  M013962.npy  [0.0, 0.5505644679069519, 0.0, 0.0594957396388...   
2  M013951.npy  [0.0, 0.931525468826294, 0.0, 0.05567017570137...   
3  M013955.npy  [0.0, 1.0000139474868774, 0.0, 0.0549789741635...   
4  M013958.npy  [0.0, 0.9899902939796448, 0.0, 0.0667410045862...   

   start_frame  end_frame                                   text_description  
0            0         93  a man walks forward and turns to the right.#a/...  
1            0         45  a man is in a seated postion. he alternates be...  
2            0        120  a man gets down on his hands and knees.#a/DET ...  
3            0         82  a person warms up pectorals by moving arms inw...  
4            0        198  a man lifts his left knee to his right elbow, ...  





In [11]:
verbs=[]
nouns=[]
for i in tqdm(range(df['text_description'].shape[0])):
  verbs.append(re.findall(f"(\S+)/VERB",df['text_description'][i]))
  nouns.append(re.findall(f"(\S+)/NOUN",df['text_description'][i]))

df['verbs']=verbs
df['nouns']=nouns

100%|██████████| 29376/29376 [00:05<00:00, 5576.34it/s]


In [12]:
df

Unnamed: 0,filename,motion_array,start_frame,end_frame,text_description,verbs,nouns
0,M013957.npy,"[0.0, 0.9644602537155151, 0.0, 0.0577980577945...",0,93,a man walks forward and turns to the right.#a/...,"[walk, turn, walk, walk]","[man, right, man, left, circle, person, left]"
1,M013962.npy,"[0.0, 0.5505644679069519, 0.0, 0.0594957396388...",0,45,a man is in a seated postion. he alternates be...,"[alternate, move, sit, gesture, face, reach, m...","[man, postion, hand, driving, motion, person, ..."
2,M013951.npy,"[0.0, 0.931525468826294, 0.0, 0.05567017570137...",0,120,a man gets down on his hands and knees.#a/DET ...,"[get, take, drop, lower, lower, step, knelt]","[man, hand, knee, man, step, knee, back, hand,..."
3,M013955.npy,"[0.0, 1.0000139474868774, 0.0, 0.0549789741635...",0,82,a person warms up pectorals by moving arms inw...,"[warm, move, exercise, wave, move]","[person, pectoral, arm, inward, outward, man, ..."
4,M013958.npy,"[0.0, 0.9899902939796448, 0.0, 0.0667410045862...",0,198,"a man lifts his left knee to his right elbow, ...","[lift, squat, lift, viceversa, hold, drop, squ...","[man, knee, elbow, knee, elbow, time, man, kne..."
...,...,...,...,...,...,...,...
29371,./TRME/MOYO/new_joints/M0168.npy,"[0.0, 0.9599376, 0.0, 0.066350274, 0.8659352, ...",0,29898,Text file not found,[],[]
29372,./TRME/MOYO/new_joints/0169.npy,"[0.0, 0.94840664, 0.0, 0.07160213, 0.86027473,...",0,29766,Text file not found,[],[]
29373,./TRME/MOYO/new_joints/M0169.npy,"[0.0, 0.9492263, 0.0, 0.066176124, 0.8548747, ...",0,29766,Text file not found,[],[]
29374,./TRME/MOYO/new_joints/0170.npy,"[0.0, 0.95115465, 0.0, 0.07090821, 0.8640788, ...",0,37884,Text file not found,[],[]


In [13]:
extended_categories = {
    'Walking/Running': ['walk', 'run', 'jog', 'sprint', 'step', 'shuffle'],
    'Sports': ['basketball', 'soccer', 'tennis', 'swimming', 'gymnastics', 'skiing', 'snowboarding', 'cycling', 'golf', 'equestrian', 'parkour', 'free running', 'skate', 'ball'],
    'Exercise/Fitness': ['exercise', 'workout', 'stretch', 'yoga', 'fitness', 'aerobics', 'squat', 'stance', 'push', 'pull'],
    'Dancing': ['dance', 'dancing', 'choreography', 'routine', 'strut', 'skip'],
    'Fighting/Martial Arts': ['fight', 'martial arts', 'boxing', 'wrestling', 'sparring', 'kick', 'karate', 'punch'],
    'Gymnastics/Acrobatics': ['hop','flip', 'somersault', 'cartwheel', 'balance beam', 'gymnastics', 'acrobatics', 'swing', 'jump', 'stunt'],
    'Object Manipulation': ['juggle', 'throw', 'catch', 'manipulation', 'hold', 'place', 'grab', 'put', 'play', 'rub', 'shake'],
    'Gestures/Expressions': ['gesture', 'expression', 'body language', 'clap', 'nod', 'wave', 'point', 'scratch'],
    'Water Activities': ['swim', 'dive', 'surf', 'drown', 'sink', 'paddle'],
    'Team Sports': ['team', 'pass', 'teamwork', 'strategy', 'goal', 'ball', 'hit', 'miss'],
    'Extreme Sports': ['extreme sports', 'skateboarding', 'skydiving', 'base jumping', 'bungee jumping'],
    'Climbing': ['climb', 'rock climbing', 'mountain climbing', 'scramble'],
    'Aerial Arts': ['aerial silk', 'aerial hoop', 'trapeze'],
    'Yoga/Pilates': ['yoga', 'pilates', 'meditation','yogi'],
    'Recreational Activities': ['hike', 'fish', 'camp', 'picnic', 'play', 'use', 'wash', 'drink'],
    'Standing/Sitting': ['stand', 'sit', 'kneel', 'rest', 'lean', 'crouch', 'bend', 'stagger', 'pace', 'stretch'],
    'Lifting/Carrying': ['pick', 'lift', 'raise', 'carry', 'hold', 'drag', 'lower', 'bring', 'drop'],
    'Balance/Stability': ['balance', 'stumble', 'regain', 'sway', 'shuffle', 'tilt', 'roll'],
    'Crawling/Low Movement': ['crawl', 'crouch', 'slide', 'stomp'],
    'Turning/Rotating': ['turn', 'pivot', 'rotate', 'spin', 'swerve', 'twist'],
    'Interaction/Contact': ['hug', 'handshake', 'pet', 'touch', 'rub', 'hand', 'shoulder'],
}

def label_motion(description):
    for category, keywords in extended_categories.items():
        for keyword in keywords:
            if keyword in description:
                return category
    return 'Other'

df['motion_category'] = df['text_description'].apply(label_motion)

In [14]:
df['motion_category'].value_counts()

motion_category
Walking/Running            14932
Object Manipulation         3758
Exercise/Fitness            2870
Gymnastics/Acrobatics       1590
Standing/Sitting            1502
Gestures/Expressions        1034
Sports                       858
Fighting/Martial Arts        736
Dancing                      480
Lifting/Carrying             460
Recreational Activities      424
Other                        422
Interaction/Contact           94
Turning/Rotating              52
Balance/Stability             40
Crawling/Low Movement         40
Team Sports                   40
Water Activities              32
Climbing                      10
Yoga/Pilates                   2
Name: count, dtype: int64

In [15]:
from collections import Counter

# Extract verbs and nouns from descriptions categorized as "Other"
other_verbs = df[df['motion_category'] == 'Other']['verbs'].explode().dropna()
other_nouns = df[df['motion_category'] == 'Other']['nouns'].explode().dropna()

# Count the frequency of verbs and nouns
verb_counter = Counter(other_verbs)
noun_counter = Counter(other_nouns)

# Print the most common verbs and nouns
print("Most common verbs:")
print(verb_counter.most_common(100))
print(len(verb_counter))
print("\nMost common nouns:")
print(noun_counter.most_common(100))
print(len(noun_counter))

Most common verbs:
[('cross', 42), ('move', 32), ('uncross', 10), ('make', 10), ('check', 10), ('act', 10), ('do', 8), ('flap', 8), ('look', 6), ('pretend', 6), ('get', 6), ('flex', 6), ('imitate', 6), ('leap', 6), ('left', 5), ('pose', 4), ('side', 4), ('sew', 4), ('fold', 4), ('have', 4), ('creep', 4), ('out', 2), ('feel', 2), ('slowly.#move', 2), ('limp', 2), ('empty', 2), ('watch', 2), ('call', 2), ('out.#move', 2), ('seem', 2), ('begin', 2), ('sratch', 2), ('engage', 2), ('pulse', 2), ('strechte', 2), ('perform', 2), ('bow', 2), ('chest.#cross', 2), ('evade', 2), ('dodge', 2), ('duck', 2), ('appear', 2), ('sctatche', 2), ('dry', 2), ('wipe', 2), ('drive', 2), ('impersonate', 2), ('elbow', 2), ('slink', 2), ('go', 2), ('sweep', 2), ('hunch', 2), ('flail', 2), ('reach', 2), ('twirl', 2), ('come', 2), ('whale', 2), ('toe', 2), ('tiptoe', 2), ('sneak', 2)]
60

Most common nouns:
[('person', 170), ('arm', 96), ('man', 30), ('chicken', 18), ('right', 15), ('side', 14), ('left', 13), ('w

# Sampling UI

In [16]:
import ipywidgets as widgets
from IPython.display import display

#Save Path
path_to_save_sampled_data = input("Enter the path to save the sampled data: ")
name = 'sampled_data.csv'
full_path = os.path.join(path_to_save_sampled_data, name)

#Get unique motion categories
motion_categories = df['motion_category'].unique()

#Create sliders
sliders = {}
for category in motion_categories:
    max_val = len(df[df['motion_category'] == category])
    slider = widgets.IntSlider(
        value=1,
        min=1,
        max=max_val,
        step=1,
        description=f'{category}:',
        continuous_update=True,
    )
    sliders[category] = slider
sample_button = widgets.Button(description='Sample Motions')

#Output widget to display the sampled data
output = widgets.Output()


def on_button_click(b):
    with output:
        output.clear_output()  # Clear previous output
        samples_list = []

        # Sample based on user preference
        for category, slider in sliders.items():
            num_samples = slider.value
            samples = df[df['motion_category'] == category].sample(n=num_samples, replace=True)
            samples_list.append(samples)

        sampled_data = pd.concat(samples_list, ignore_index=True)
        sampled_data.to_csv(full_path, index=False)
        display(sampled_data)

# Connect the button click event to the function
sample_button.on_click(on_button_click)

# Display all sliders and the buttons
widgets_to_display = list(sliders.values()) + [sample_button, output]
display(*widgets_to_display)

Enter the path to save the sampled data: /content/drive/MyDrive/TRME/TRME


IntSlider(value=1, description='Walking/Running:', max=14932, min=1)

IntSlider(value=1, description='Gestures/Expressions:', max=1034, min=1)

IntSlider(value=1, description='Exercise/Fitness:', max=2870, min=1)

IntSlider(value=1, description='Gymnastics/Acrobatics:', max=1590, min=1)

IntSlider(value=1, description='Object Manipulation:', max=3758, min=1)

IntSlider(value=1, description='Fighting/Martial Arts:', max=736, min=1)

IntSlider(value=1, description='Lifting/Carrying:', max=460, min=1)

IntSlider(value=1, description='Recreational Activities:', max=424, min=1)

IntSlider(value=1, description='Standing/Sitting:', max=1502, min=1)

IntSlider(value=1, description='Sports:', max=858, min=1)

IntSlider(value=1, description='Crawling/Low Movement:', max=40, min=1)

IntSlider(value=1, description='Interaction/Contact:', max=94, min=1)

IntSlider(value=1, description='Dancing:', max=480, min=1)

IntSlider(value=1, description='Team Sports:', max=40, min=1)

IntSlider(value=1, description='Other:', max=422, min=1)

IntSlider(value=1, description='Turning/Rotating:', max=52, min=1)

IntSlider(value=1, description='Water Activities:', max=32, min=1)

IntSlider(value=1, description='Balance/Stability:', max=40, min=1)

IntSlider(value=1, description='Climbing:', max=10, min=1)

IntSlider(value=1, description='Yoga/Pilates:', max=2, min=1)

Button(description='Sample Motions', style=ButtonStyle())

Output()

# Generated Sampled Dataframe

In [None]:
final_df = pd.read_csv('/content/drive/MyDrive/TRME/TRME/sampled_data.csv')

In [None]:
final_df

Unnamed: 0,filename,motion_array,start_frame,end_frame,text_description,verbs,nouns,motion_category
0,M013872.npy,"[0.0, 1.810925841331482, 0.0, 0.06040766462683...",0,106,a man walks forwards down some stairs while ho...,"['walk', 'hold', 'walk', 'hold', 'walk']","['man', 'stair', 'handrail', 'hand', 'person',...",Walking/Running
1,M001357.npy,"[0.0, 0.9441778659820557, 0.0, 0.0561531074345...",0,87,a person walking towards an edge and then stop...,"['walk', 'stop', 'examine', 'walk', 'walk', 'k...","['person', 'edge', 'person', 'left', 'circling...",Walking/Running
2,003064.npy,"[0.0, 0.5775465965270996, 0.0, 0.0461752638220...",0,198,a person crawls downward on all fours then sta...,"['crawl', 'stand', 'face', 'touch', 'left', 'c...","['person', 'four', 'right', 'sidestep', 'knee'...",Walking/Running
3,M011866.npy,"[0.0, 0.9501931071281433, 0.0, 0.0633524656295...",0,153,a man slowly walks forward#a/DET man/NOUN slow...,"['walk', 'walk', 'walk']","['man', 'person', 'person']",Walking/Running
4,M000901.npy,"[0.0, 0.9156006574630737, 0.0, 0.0543413981795...",0,198,a man slowly walks backwards and then walks si...,"['walk', 'walk', 'side', 'stagger', 'look', 's...","['man', 'side', 'walking', 'man', 'person', 'r...",Walking/Running
...,...,...,...,...,...,...,...,...
11340,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.9602345 0. ... -0.66...,0,21582,A person is doing the Standing Forward Bend po...,"['is', 'doing', 'Standing', 'performs']","['person', 'yogi']",Yoga/Pilates
11341,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.94287604 0. ... -0.70...,0,24750,A person is doing the Cobra Pose#A/DET person/...,"['is', 'doing', 'performs']","['person', 'yogi']",Yoga/Pilates
11342,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.95115465 0. ... -0.68...,0,37884,A person is doing the Cobra Pose#A/DET person/...,"['is', 'doing', 'performs']","['person', 'yogi']",Yoga/Pilates
11343,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.9546067 0. ... -0.70...,0,71874,A person is doing the Scorpion pose#A/DET pers...,"['is', 'doing', 'performs']","['person', 'pose', 'yogi']",Yoga/Pilates


In [None]:
# Initialize the counter for naming
counter = 0

# Function to create a new name
def create_new_name(filename, counter):
    # Check if the filename starts with "M"
    if filename.startswith("M"):
        new_name = f"M{counter:06d}.npy"
    else:
        new_name = f"{counter:06d}.npy"
    return new_name

# Apply the function to create new names and get the count
final_df['new_name'] = [create_new_name(filename, i) for i, filename in enumerate(final_df['filename'])]

# Display the updated DataFrame
print(final_df)

                                                filename  \
0                                            M013872.npy   
1                                            M001357.npy   
2                                             003064.npy   
3                                            M011866.npy   
4                                            M000901.npy   
...                                                  ...   
11340  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11341  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11342  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11343  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11344  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   

                                            motion_array  start_frame  \
0      [0.0, 1.810925841331482, 0.0, 0.06040766462683...            0   
1      [0.0, 0.9441778659820557, 0.0, 0.0561531074345...            0   
2      [0.0, 0.5775465965270996, 0.0, 0.0461752638220...    

#Adding new_joints

In [None]:
path_2_add = '/content/drive/MyDrive/TRME/TRME/hml3d/new_joints'
nj_dir_moyo='/content/drive/MyDrive/TRME/TRME/MOYO/new_joints'
# List to store whether each path exists
path_existence = []
final_paths=[]
for name in final_df['filename']:
    if os.path.isabs(name):
        moyo_path=os.path.join(nj_dir_moyo,os.path.basename(name))
        path_existence.append(os.path.exists(moyo_path))
        final_paths.append(moyo_path)
    else:
        # If it's a relative path, create the full path and check its existence
        full_path = os.path.join(path_2_add, name)
        if os.path.exists(full_path):
          path_existence.append(True)
          final_paths.append(full_path)
        else:
          path_existence.append(False)
          final_paths.append(None)

print("Path existence:", path_existence)
print(f"Number of paths : {len(final_paths)}")
print(final_paths)
final_df['Path_existence'] = path_existence
final_df['final_paths'] = final_paths

Path existence: [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True,

In [None]:
final_df['Path_existence'].value_counts()

Path_existence
True    11345
Name: count, dtype: int64

In [None]:
print(final_df)

                                                filename  \
0                                            M013872.npy   
1                                            M001357.npy   
2                                             003064.npy   
3                                            M011866.npy   
4                                            M000901.npy   
...                                                  ...   
11340  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11341  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11342  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11343  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   
11344  /content/drive/MyDrive/TRME/Dataset/MOYO/new_j...   

                                            motion_array  start_frame  \
0      [0.0, 1.810925841331482, 0.0, 0.06040766462683...            0   
1      [0.0, 0.9441778659820557, 0.0, 0.0561531074345...            0   
2      [0.0, 0.5775465965270996, 0.0, 0.0461752638220...    

In [None]:
#remove the directory
shutil.rmtree('/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/new_joints')

In [None]:
final_df

Unnamed: 0,filename,motion_array,start_frame,end_frame,text_description,verbs,nouns,motion_category,new_name,Path_existence,final_paths
0,M013872.npy,"[0.0, 1.810925841331482, 0.0, 0.06040766462683...",0,106,a man walks forwards down some stairs while ho...,"['walk', 'hold', 'walk', 'hold', 'walk']","['man', 'stair', 'handrail', 'hand', 'person',...",Walking/Running,M000000.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
1,M001357.npy,"[0.0, 0.9441778659820557, 0.0, 0.0561531074345...",0,87,a person walking towards an edge and then stop...,"['walk', 'stop', 'examine', 'walk', 'walk', 'k...","['person', 'edge', 'person', 'left', 'circling...",Walking/Running,M000001.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
2,003064.npy,"[0.0, 0.5775465965270996, 0.0, 0.0461752638220...",0,198,a person crawls downward on all fours then sta...,"['crawl', 'stand', 'face', 'touch', 'left', 'c...","['person', 'four', 'right', 'sidestep', 'knee'...",Walking/Running,000002.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
3,M011866.npy,"[0.0, 0.9501931071281433, 0.0, 0.0633524656295...",0,153,a man slowly walks forward#a/DET man/NOUN slow...,"['walk', 'walk', 'walk']","['man', 'person', 'person']",Walking/Running,M000003.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
4,M000901.npy,"[0.0, 0.9156006574630737, 0.0, 0.0543413981795...",0,198,a man slowly walks backwards and then walks si...,"['walk', 'walk', 'side', 'stagger', 'look', 's...","['man', 'side', 'walking', 'man', 'person', 'r...",Walking/Running,M000004.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
...,...,...,...,...,...,...,...,...,...,...,...
11340,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.9602345 0. ... -0.66...,0,21582,A person is doing the Standing Forward Bend po...,"['is', 'doing', 'Standing', 'performs']","['person', 'yogi']",Yoga/Pilates,011340.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...
11341,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.94287604 0. ... -0.70...,0,24750,A person is doing the Cobra Pose#A/DET person/...,"['is', 'doing', 'performs']","['person', 'yogi']",Yoga/Pilates,011341.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...
11342,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.95115465 0. ... -0.68...,0,37884,A person is doing the Cobra Pose#A/DET person/...,"['is', 'doing', 'performs']","['person', 'yogi']",Yoga/Pilates,011342.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...
11343,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.9546067 0. ... -0.70...,0,71874,A person is doing the Scorpion pose#A/DET pers...,"['is', 'doing', 'performs']","['person', 'pose', 'yogi']",Yoga/Pilates,011343.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...


In [None]:
final_df.to_csv('/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/Custom_Dataset_1.csv', index=False)

In [None]:
# Destination directories for the numpy files and text files
destination_dir = '/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/new_joints'
text_dir = '/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/texts'

# Ensure the directories exist
os.makedirs(destination_dir, exist_ok=True)
os.makedirs(text_dir, exist_ok=True)

# Initialize counters for summary
files_copied = 0
texts_created = 0

# Loop over the dataframe to copy files and create text files
for _, row in tqdm(final_df.iterrows(), total=final_df.shape[0], desc="Processing files"):
    source_path = row['final_paths']
    new_name = row['new_name']
    text_content = row['text_description']

    # Define the destination path for the numpy file and the text file
    destination_path = os.path.join(destination_dir, new_name)
    text_file_name = os.path.splitext(new_name)[0] + '.txt'  t
    text_path = os.path.join(text_dir, text_file_name)

    # Copy the numpy file if it exists
    if os.path.exists(source_path):
        try:
            shutil.copy2(source_path, destination_path)
            files_copied += 1
        except Exception as e:
            print(f"Error copying {source_path} to {destination_path}: {e}")
    else:
        print(f"Source file does not exist: {source_path}")

    # Create a text file with the text description
    try:
        with open(text_path, 'w') as text_file:
            text_file.write(text_content)
            texts_created += 1  # Increment counter
    except Exception as e:
        print(f"Error writing text file {text_path}: {e}")

print(f"Processing completed. {files_copied} files copied, {texts_created} text files created.")

Processing files: 100%|██████████| 11345/11345 [35:12<00:00,  5.37it/s]

Processing completed. 11345 files copied, 11345 text files created.





In [None]:
print(len(os.listdir('/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/new_joints')))

11345


# Adding new_joint_vecs

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
final_df = pd.read_csv('/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/Custom_Dataset_1.csv')
final_df.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,text_description,verbs,nouns,motion_category,new_name,Path_existence,final_paths
0,M013872.npy,"[0.0, 1.810925841331482, 0.0, 0.06040766462683...",0,106,a man walks forwards down some stairs while ho...,"['walk', 'hold', 'walk', 'hold', 'walk']","['man', 'stair', 'handrail', 'hand', 'person',...",Walking/Running,M000000.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
1,M001357.npy,"[0.0, 0.9441778659820557, 0.0, 0.0561531074345...",0,87,a person walking towards an edge and then stop...,"['walk', 'stop', 'examine', 'walk', 'walk', 'k...","['person', 'edge', 'person', 'left', 'circling...",Walking/Running,M000001.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
2,003064.npy,"[0.0, 0.5775465965270996, 0.0, 0.0461752638220...",0,198,a person crawls downward on all fours then sta...,"['crawl', 'stand', 'face', 'touch', 'left', 'c...","['person', 'four', 'right', 'sidestep', 'knee'...",Walking/Running,000002.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
3,M011866.npy,"[0.0, 0.9501931071281433, 0.0, 0.0633524656295...",0,153,a man slowly walks forward#a/DET man/NOUN slow...,"['walk', 'walk', 'walk']","['man', 'person', 'person']",Walking/Running,M000003.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
4,M000901.npy,"[0.0, 0.9156006574630737, 0.0, 0.0543413981795...",0,198,a man slowly walks backwards and then walks si...,"['walk', 'walk', 'side', 'stagger', 'look', 's...","['man', 'side', 'walking', 'man', 'person', 'r...",Walking/Running,M000004.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...


In [None]:
path_2_add = '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs'
nj_dir_moyo='/content/drive/MyDrive/TRME/TRME/MOYO/new_joint_vecs'
# List to store whether each path exists
path_existence = []
final_paths=[]
for name in final_df['filename']:
    if os.path.isabs(name):
        moyo_path=os.path.join(nj_dir_moyo,os.path.basename(name))
        path_existence.append(os.path.exists(moyo_path))
        final_paths.append(moyo_path)
    else:
        # If it's a relative path, create the full path and check its existence
        full_path = os.path.join(path_2_add, name)
        if os.path.exists(full_path):
          path_existence.append(True)
          final_paths.append(full_path)
        else:
          path_existence.append(False)
          final_paths.append(None)

print("Path existence:", len(path_existence))
print(f"Number of paths : {len(final_paths)}")
print(final_paths)
final_df['NJV_Path_existence'] = path_existence
final_df['NJV_final_paths'] = final_paths

Path existence: 11345
Number of paths : 11345
['/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M013872.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M001357.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/003064.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M011866.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M000901.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/003462.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/010840.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M001295.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/002017.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/003545.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M011822.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M008021.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/M012367.npy', '/content/drive/MyDrive/TRME/TRME/hml3d/new_joint_vecs/005038

In [None]:
final_df[final_df['NJV_Path_existence']==True]

Unnamed: 0,filename,motion_array,start_frame,end_frame,text_description,verbs,nouns,motion_category,new_name,Path_existence,final_paths,NJV_Path_existence,NJV_final_paths
0,M013872.npy,"[0.0, 1.810925841331482, 0.0, 0.06040766462683...",0,106,a man walks forwards down some stairs while ho...,"['walk', 'hold', 'walk', 'hold', 'walk']","['man', 'stair', 'handrail', 'hand', 'person',...",Walking/Running,M000000.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
1,M001357.npy,"[0.0, 0.9441778659820557, 0.0, 0.0561531074345...",0,87,a person walking towards an edge and then stop...,"['walk', 'stop', 'examine', 'walk', 'walk', 'k...","['person', 'edge', 'person', 'left', 'circling...",Walking/Running,M000001.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
2,003064.npy,"[0.0, 0.5775465965270996, 0.0, 0.0461752638220...",0,198,a person crawls downward on all fours then sta...,"['crawl', 'stand', 'face', 'touch', 'left', 'c...","['person', 'four', 'right', 'sidestep', 'knee'...",Walking/Running,000002.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
3,M011866.npy,"[0.0, 0.9501931071281433, 0.0, 0.0633524656295...",0,153,a man slowly walks forward#a/DET man/NOUN slow...,"['walk', 'walk', 'walk']","['man', 'person', 'person']",Walking/Running,M000003.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
4,M000901.npy,"[0.0, 0.9156006574630737, 0.0, 0.0543413981795...",0,198,a man slowly walks backwards and then walks si...,"['walk', 'walk', 'side', 'stagger', 'look', 's...","['man', 'side', 'walking', 'man', 'person', 'r...",Walking/Running,M000004.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11340,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.9602345 0. ... -0.66...,0,21582,A person is doing the Standing Forward Bend po...,"['is', 'doing', 'Standing', 'performs']","['person', 'yogi']",Yoga/Pilates,011340.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...
11341,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.94287604 0. ... -0.70...,0,24750,A person is doing the Cobra Pose#A/DET person/...,"['is', 'doing', 'performs']","['person', 'yogi']",Yoga/Pilates,011341.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...
11342,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.95115465 0. ... -0.68...,0,37884,A person is doing the Cobra Pose#A/DET person/...,"['is', 'doing', 'performs']","['person', 'yogi']",Yoga/Pilates,011342.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...
11343,/content/drive/MyDrive/TRME/Dataset/MOYO/new_j...,[ 0. 0.9546067 0. ... -0.70...,0,71874,A person is doing the Scorpion pose#A/DET pers...,"['is', 'doing', 'performs']","['person', 'pose', 'yogi']",Yoga/Pilates,011343.npy,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...,True,/content/drive/MyDrive/TRME/TRME/MOYO/new_join...


In [None]:
# Destination directory
destination_dir = '/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/new_joint_vecs'

# Ensure the destination directory exists
os.makedirs(destination_dir, exist_ok=True)

# Loop through the DataFrame with a progress bar
for idx, row in tqdm(final_df.iterrows(), total=final_df.shape[0], desc="Copying files"):
    source_path = row['NJV_final_paths']
    new_name = row['new_name']
    destination_path = os.path.join(destination_dir, new_name)  # Define destination with the new name

    # Copy the file if the source path exists
    if os.path.exists(source_path):
        try:
            shutil.copy2(source_path, destination_path)  # Copy with metadata
        except Exception as e:
            print(f"Error copying {source_path} to {destination_path}: {e}")
    else:
        print(f"Source file does not exist: {source_path}")

print("File copying completed.")

Copying files: 100%|██████████| 11345/11345 [04:42<00:00, 40.09it/s]

File copying completed.





In [None]:
print(len(os.listdir('/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/new_joint_vecs')))

11345


# Mean & Variance

In [None]:
cd /content/drive/MyDrive/TRME/TRME

/content/drive/MyDrive/TRME/TRME


In [None]:
!ls

Custom_Dataset_1    HumanML3D		      raw_pose_processing.ipynb  TRNEDataset.csv
dataset_class.xlsx  MOYO		      sampled_data.csv		 Zips
hml3d		    npy_to_text_mapping.xlsx  TRNEDataset


In [None]:
import numpy as np
import os
from os.path import join as pjoin
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import gc

# Function to load and return data from a single file
def load_file(file_path):
    data = np.load(file_path)
    if np.isnan(data).any():
        return None
    return data

# Calculate mean and variance in chunks
def mean_variance(data_dir, save_dir, joints_num, batch_size=10, max_workers=4):
    file_list = os.listdir(data_dir)
    data_list = []

    with tqdm(total=len(file_list), desc='Loading Data') as pbar:
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = []
            # batches of future tasks
            for file in file_list:
                futures.append(executor.submit(load_file, pjoin(data_dir, file)))

            for future in as_completed(futures):
                data = future.result()
                if data is not None:
                    data_list.append(data)
                pbar.update(1)  # Update progress bar

            # garbage collection
            gc.collect()

    if data_list:
        data = np.concatenate(data_list, axis=0)

        # Calculate mean and standard deviation
        mean = data.mean(axis=0)
        std = data.std(axis=0)

        # Apply scaling
        std[0:1] = std[0:1].mean()
        std[1:3] = std[1:3].mean()
        std[3:4] = std[3:4].mean()
        std[4: 4 + (joints_num - 1) * 3] = std[4: 4 + (joints_num - 1) * 3].mean()
        std[4 + (joints_num - 1) * 3: 4 + (joints_num - 1) * 9] = std[4 + (joints_num - 1) * 3: 4 + (joints_num - 1) * 9].mean()
        std[4 + (joints_num - 1) * 9: 4 + (joints_num - 1) * 9 + joints_num * 3].mean()
        std[4 + (joints_num - 1) * 9 + joints_num * 3:] = std[4 + (joints_num - 1) * 9 + joints_num * 3:].mean()

        # Ensure correct shape
        assert 8 + (joints_num - 1) * 9 + joints_num * 3 == std.shape[-1]

        # Save mean and standard deviation
        np.save(pjoin(save_dir, 'Mean.npy'), mean)
        np.save(pjoin(save_dir, 'Std.npy'), std)

        return mean, std
    else:
        print("No valid data to process")
        return None, None


# Main script
if __name__ == '__main__':
    data_dir = './Custom_Dataset_1/new_joint_vecs/'  # Directory with data files
    save_dir = './Custom_Dataset_1/'  # Directory to save Mean and Std
    joints_num = 22  # Number of joints based on the dataset used

    mean, std = mean_variance(data_dir, save_dir, joints_num, max_workers=6)

Loading Data: 100%|██████████| 11345/11345 [05:14<00:00, 36.09it/s] 


# Train Test Val Texts


In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)


Mounted at /content/drive


In [None]:
texts = '/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/texts'
new_joints = '/content/drive/MyDrive/TRME/TRME/Custom_Dataset_1/new_joints'
file_names_nj = [os.path.basename(x)[:-4]for x in os.listdir(new_joints)]
train_test_val = pd.DataFrame(file_names_nj,columns=['Name'])

In [None]:
text_check=[]
nj_check = []
for name in train_test_val['Name']:
  if os.path.exists(os.path.join(texts,name)+'.txt'):
    text_check.append(True)
  else :
    text_check.append(False)

for name in train_test_val['Name']:
  if os.path.exists(os.path.join(new_joints,name)+'.npy'):
    nj_check.append(True)
  else :
    nj_check.append(False)

In [None]:
train_test_val['Text_Path_check']= text_check
train_test_val['New_Joints_Path_check']= nj_check

In [None]:
train_test_val

Unnamed: 0,Name,Text_Path_check,New_Joints_Path_check
0,M005333,True,True
1,M003979,True,True
2,M010931,True,True
3,M007882,True,True
4,M006001,True,True
...,...,...,...
11340,005940,True,True
11341,008004,True,True
11342,002722,True,True
11343,007538,True,True


In [None]:
final_df=pd.read_csv('/content/drive/MyDrive/TRME/T2M-GPT/dataset/HumanML3D/Custom_Dataset_1.csv')
final_df.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,text_description,verbs,nouns,motion_category,new_name,Path_existence,final_paths
0,M013872.npy,"[0.0, 1.810925841331482, 0.0, 0.06040766462683...",0,106,a man walks forwards down some stairs while ho...,"['walk', 'hold', 'walk', 'hold', 'walk']","['man', 'stair', 'handrail', 'hand', 'person',...",Walking/Running,M000000.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
1,M001357.npy,"[0.0, 0.9441778659820557, 0.0, 0.0561531074345...",0,87,a person walking towards an edge and then stop...,"['walk', 'stop', 'examine', 'walk', 'walk', 'k...","['person', 'edge', 'person', 'left', 'circling...",Walking/Running,M000001.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
2,003064.npy,"[0.0, 0.5775465965270996, 0.0, 0.0461752638220...",0,198,a person crawls downward on all fours then sta...,"['crawl', 'stand', 'face', 'touch', 'left', 'c...","['person', 'four', 'right', 'sidestep', 'knee'...",Walking/Running,000002.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
3,M011866.npy,"[0.0, 0.9501931071281433, 0.0, 0.0633524656295...",0,153,a man slowly walks forward#a/DET man/NOUN slow...,"['walk', 'walk', 'walk']","['man', 'person', 'person']",Walking/Running,M000003.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...
4,M000901.npy,"[0.0, 0.9156006574630737, 0.0, 0.0543413981795...",0,198,a man slowly walks backwards and then walks si...,"['walk', 'walk', 'side', 'stagger', 'look', 's...","['man', 'side', 'walking', 'man', 'person', 'r...",Walking/Running,M000004.npy,True,/content/drive/MyDrive/TRME/TRME/hml3d/new_joi...


In [None]:
import os

# Determine the split for train, test, train_val
train_ratio = 0.7
test_ratio = 0.15
train_val_ratio = 0.075
val_ratio = 1 - (train_ratio + test_ratio + train_val_ratio)

# Shuffle the dataframe for random distribution
final_df = final_df.sample(frac=1).reset_index(drop=True)

# Split into train, test, train_val, and validation
train_df = train_test_val[:int(len(train_test_val) * train_ratio)]
test_df = train_test_val[int(len(train_test_val) * train_ratio):int(len(train_test_val) * (train_ratio + test_ratio))]
train_val_df = train_test_val[int(len(train_test_val) * (train_ratio + test_ratio)):int(len(train_test_val) * (train_ratio + test_ratio + train_val_ratio))]
val_df = train_test_val[int(len(train_test_val) * (train_ratio + test_ratio + train_val_ratio)):]

# File paths for the output text files
output_dir = '/content/drive/MyDrive/TRME/T2M-GPT/dataset/HumanML3D'

os.makedirs(output_dir, exist_ok=True)

# Function to write a list to a text file
def write_to_text_file(filename, content_list):
    file_path = os.path.join(output_dir, filename)
    with open(file_path, 'w') as f:
        for item in content_list:
            f.write(item + '\n')

# Write to text files
write_to_text_file("all.txt", train_test_val['Name'].tolist())
write_to_text_file("train.txt", train_df['Name'].tolist())
write_to_text_file("test.txt", test_df['Name'].tolist())
write_to_text_file("train_val.txt", train_val_df['Name'].tolist())
write_to_text_file("val.txt", val_df['Name'].tolist())

print("Text files created successfully.")


Text files created successfully.
