In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!ls "/content/drive/My Drive/mdm_1/motion-diffusion-model/dataset/HumanML3D"

AboutHumanAct12.txt	 motion_representation.ipynb  raw_pose_processing.ipynb  train.txt
all.txt			 new_joints		      Std.npy			 train_val.txt
animation.ipynb		 new_joints_old		      test.txt			 val.txt
cal_mean_variance.ipynb  new_joint_vecs		      text_process.py
index.csv		 new_joint_vecs_old	      texts
Mean.npy		 paramUtil.py		      texts.zip


In [3]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
pd.set_option('display.max_colwidth', None)

In [5]:
joints_dir = '/content/drive/My Drive/mdm_1/motion-diffusion-model/dataset/HumanML3D/new_joints'
texts_dir = '/content/drive/My Drive/mdm_1/motion-diffusion-model/dataset/HumanML3D/texts'

npy_files_count = len([i for i in os.listdir(joints_dir) if i.endswith('.npy')])
text_files_count = len([i for i in os.listdir(texts_dir) if i.endswith('.txt')])

print("No. of .npy files:", npy_files_count)
print("No. of .txt files:", text_files_count)

No. of .npy files: 29376
No. of .txt files: 29232


In [None]:
# f = []
# for i in tqdm([j for j in os.listdir(joints_dir) if j.endswith('.npy')], desc="Processing .npy files"):
#     file_path = os.path.join(joints_dir, i)
#     a = np.load(file_path, mmap_mode='r')
#     f.append({'filename': i, 'motion_array': a.flatten().tolist()})

In [None]:
def process_files(file_batch):
    result = []
    for filename in file_batch:
        file_path = os.path.join(joints_dir, filename)
        a = np.load(file_path, mmap_mode='r')
        result.append({'filename': filename, 'motion_array': a.flatten().tolist()})
    return result

num_workers = 4
batch_size = 50

files = [j for j in os.listdir(joints_dir) if j.endswith('.npy')]
batches = [files[i:i + batch_size] for i in range(0, len(files), batch_size)]

results = []
with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for batch_result in tqdm(executor.map(process_files, batches), total=len(batches), desc="Processing .npy files in batches"):
        results.extend(batch_result)

f = results

In [None]:
df_npy = pd.DataFrame(f)

In [None]:
df_npy.head()

Unnamed: 0,filename,motion_array
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]"
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]"
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]"
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]"
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]"


In [None]:
df_npy.shape

(29376, 2)

In [None]:
example_npy = 'M013957.npy'

npy_path = os.path.join(joints_dir, example_npy)
motion_data = np.load(npy_path)
print("Shape of motion data:", motion_data.shape)
print("Number of frames:", motion_data.shape[0])

Shape of motion data: (94, 22, 3)
Number of frames: 94


In [None]:
example_txt = example_npy.replace('.npy', '.txt')
text_path = os.path.join(texts_dir, example_txt)

with open(text_path, 'r') as file:
    text_content = file.read()
print("Content of text file:", text_content)

Content of text file: a man walks forward and turns to the right.#a/DET man/NOUN walk/VERB forward/ADV and/CCONJ turn/VERB to/ADP the/DET right/NOUN#0.0#0.0
a man walks to his left in a half circle.#a/DET man/NOUN walk/VERB to/ADP his/DET left/NOUN in/ADP a/DET half/ADJ circle/NOUN#0.0#0.0
a person walks curved to the left.#a/DET person/NOUN walk/VERB curved/ADJ to/ADP the/DET left/NOUN#0.0#0.0



In [None]:
for m, n in df_npy.head(20).iterrows():
    file_path = os.path.join(joints_dir, n['filename'])
    motion_data = np.load(file_path)
    print(f"{n['filename']} shape: {motion_data.shape}")

M013957.npy shape: (94, 22, 3)
M013962.npy shape: (46, 22, 3)
M013951.npy shape: (121, 22, 3)
M013955.npy shape: (83, 22, 3)
M013958.npy shape: (199, 22, 3)
M013953.npy shape: (121, 22, 3)
M013961.npy shape: (104, 22, 3)
M013964.npy shape: (167, 22, 3)
M013954.npy shape: (64, 22, 3)
M013973.npy shape: (30, 22, 3)
M013967.npy shape: (139, 22, 3)
M013976.npy shape: (67, 22, 3)
M013966.npy shape: (67, 22, 3)
M013965.npy shape: (67, 22, 3)
M013975.npy shape: (102, 22, 3)
M013969.npy shape: (199, 22, 3)
M013971.npy shape: (141, 22, 3)
M013968.npy shape: (33, 22, 3)
M013972.npy shape: (85, 22, 3)
M013970.npy shape: (104, 22, 3)


In [None]:
df_npy['start_frame'] = 0
df_npy['end_frame'] = df_npy['motion_array'].apply(lambda x: len(x) // (22 * 3) - 1)

In [None]:
df_npy

Unnamed: 0,filename,motion_array,start_frame,end_frame
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198
...,...,...,...,...
29371,000372.npy,"[0.0, 0.986068, 0.0, 0.065244325, 0.90679634, -0.009133775, -0.047032744, 0.89224845, -0.03256019, 0.0051647015, 1.1141411, -0.02967862, 0.11137977, 0.5159779, -0.0006636232, -0.056359448, 0.5027099, -0.053044774, 0.0053653773, 1.2514297, 0.011005984, 0.12372428, 0.08506321, -0.0347012, -0.011607647, 0.08137669, -0.09364016, 0.005953124, 1.3078724, 0.021232877, 0.16185892, 0.051205337, 0.09930525, -0.114937745, 0.014383518, -0.009018272, 0.021648254, 1.526612, 0.016188052, 0.091184914, 1.4154922, 0.013740122, -0.060124822, 1.4336528, 0.0019544587, 0.017331801, 1.6081698, 0.079011604, 0.21813059, 1.4153808, -0.021002129, -0.17108919, 1.4866796, 0.0024242997, 0.24420926, 1.1637787, -0.06553297, -0.27981263, 1.2489706, -0.027425483, 0.08856348, 1.1287014, 0.1473196, -0.14622842, 1.1229976, 0.17036073, 0.0047680843, 0.98542386, 0.0016089075, 0.07003858, 0.906406, -0.009354036, -0.04293792, 0.89115876, -0.028600104, 0.008208377, 1.1133142, -0.02909014, 0.11287795, 0.51522547, -0.00030627917, -0.054072067, 0.50180453, -0.05153867, 0.009084893, 1.2508974, 0.010577564, 0.124407984, 0.08441601, -0.035924748, -0.011109419, 0.08054932, -0.09478979, 0.008078434, 1.3075106, 0.019777583, 0.16243994, 0.050403405, 0.098071694, -0.114325464, ...]",0,198
29372,000367.npy,"[0.0, 0.9620773, 0.0, 0.06121088, 0.8793125, -0.0052406066, -0.06478205, 0.8733664, -0.002832906, -0.0029481174, 1.0901207, -0.030106775, 0.10660615, 0.48838234, -0.012461217, -0.11693221, 0.48777303, -0.0319004, -0.0004975542, 1.2332187, -0.025593612, 0.08093606, 0.058664642, -0.053475328, -0.081902, 0.067802005, -0.091646895, 0.0016762048, 1.2819417, 0.0046067634, 0.10045272, 0.0039172713, 0.077598095, -0.16117202, 0.017845584, 0.024743296, -0.0008863413, 1.4988116, -0.028251978, 0.071722254, 1.3975122, -0.020683866, -0.07361239, 1.3995566, -0.027904382, -0.001232276, 1.5801872, 0.034953635, 0.20266423, 1.3862075, -0.027657941, -0.19588472, 1.3865198, -0.030065227, 0.2382655, 1.131858, -0.03004868, -0.22031848, 1.1253533, -0.05037209, 0.3193068, 0.89563596, 0.061568376, -0.28765425, 0.87986577, 0.039266255, 0.00075517705, 0.96223116, -0.0014313216, 0.061908774, 0.8794304, -0.006772017, -0.0640921, 0.87357795, -0.0045605805, -0.0020489683, 1.0903451, -0.031250063, 0.10708715, 0.4884686, -0.01362982, -0.11657694, 0.4879726, -0.03285713, 0.00026996474, 1.2334273, -0.026194027, 0.081116065, 0.05865455, -0.053427037, -0.08200636, 0.0678357, -0.09169841, 0.0022217534, 1.282217, 0.0039134147, 0.100424334, 0.0038289449, 0.0776447, -0.16114102, ...]",0,62
29373,000374.npy,"[0.0, 0.951418, 0.0, 0.066131674, 0.8723574, -0.0005239397, -0.064866945, 0.8629872, 0.006829571, -0.0056908205, 1.076704, -0.039764594, 0.082227826, 0.4792355, -0.012157731, -0.094831474, 0.47422588, -0.0077850893, -0.005952766, 1.2196876, -0.032079175, 0.06044603, 0.050720185, -0.065996304, -0.11172709, 0.058111027, -0.0957361, -0.003438585, 1.2636327, 0.0047070617, 0.08648928, 0.0026231701, 0.066543214, -0.13798794, 0.017849099, 0.04573974, 0.0027308352, 1.4786768, -0.038150582, 0.067124605, 1.3773872, -0.026651233, -0.071271986, 1.3851334, -0.029858999, 0.011987507, 1.5525422, 0.033090517, 0.19293866, 1.3482689, -0.052051693, -0.18866014, 1.3634034, -0.059405267, 0.23064911, 1.0949372, -0.0712291, -0.22470488, 1.1027925, -0.059464082, 0.28109992, 0.86210585, 0.047120035, -0.30832216, 0.8649361, 0.03679484, 0.00014643112, 0.9513337, 0.00030797406, 0.06623859, 0.8722402, -0.00023254717, -0.06476534, 0.8629335, 0.007107359, -0.0054783635, 1.0766337, -0.03942215, 0.0824002, 0.47911853, -0.011782496, -0.094807535, 0.474186, -0.007713921, -0.005847497, 1.2196065, -0.03154465, 0.060515642, 0.050652824, -0.06597303, -0.11173412, 0.05809828, -0.09578731, -0.0033629565, 1.2636123, 0.0051710336, 0.086522624, 0.0025518544, 0.06657232, -0.13801627, ...]",0,157
29374,000376.npy,"[0.0, 0.9220812, 0.0, 0.0474619, 0.8344719, -0.02638619, -0.06035675, 0.8315262, 0.015205715, -0.00678502, 1.0437684, -0.04956176, 0.03859741, 0.44393337, -0.074763134, -0.038094923, 0.49986175, 0.21953374, 0.0047063134, 1.1807545, -0.009484364, -0.0656876, 0.045478098, -0.20650709, 0.005100429, 0.0877944, 0.31703794, -4.9724244e-05, 1.2373941, -0.0017348824, 0.02688881, 0.00434526, -0.105037645, 0.00048679113, 0.11348939, 0.4641587, -0.024535576, 1.455317, -0.007107797, 0.065813445, 1.3580757, -0.00096543506, -0.08896511, 1.3486159, -0.018539991, -0.025497586, 1.5358421, 0.057172026, 0.1972872, 1.3543756, 0.0038520694, -0.21011358, 1.3396957, -0.0377398, 0.23405737, 1.100631, 0.01896678, -0.20522031, 1.0995663, -0.14512354, 0.24963686, 0.91014105, 0.20398863, -0.23317052, 0.837183, -0.08849251, 0.0015896205, 0.9197244, 0.020428684, 0.049073704, 0.83259493, -0.007463198, -0.058609106, 0.8288685, 0.034416966, -0.005499439, 1.0421574, -0.027216952, 0.04012993, 0.44331217, -0.065067396, -0.03569165, 0.49188414, 0.22977093, 0.006077325, 1.179195, 0.012659111, -0.06454842, 0.048198584, -0.20622723, 0.007178098, 0.07726686, 0.3159516, 0.0013301652, 1.2358246, 0.020486794, 0.026918627, 0.0044105337, -0.10486293, 0.0046038926, ...]",0,165


In [None]:
texts = []
for i in tqdm([f for f in os.listdir(texts_dir) if f.endswith('.txt')], desc="Reading and storing text files"):
    file_path = os.path.join(texts_dir, i)
    with open(file_path, 'r') as file:
        r = file.read()
    texts.append({'filename': i, 'text_content': r})

Reading and storing text files: 100%|██████████| 29232/29232 [16:09<00:00, 30.14it/s] 


In [None]:
df_texts = pd.DataFrame(texts)
df_texts.head()

Unnamed: 0,filename,text_content
0,M013624.txt,a standing person with both their arms in front of them pushes back with their right foot and walking slowly with their left foot#a/DET stand/VERB person/NOUN with/ADP both/CCONJ their/DET arm/NOUN in/ADP front/NOUN of/ADP them/PRON push/VERB back/ADV with/ADP their/DET right/ADJ foot/NOUN and/CCONJ walk/VERB slowly/ADV with/ADP their/DET left/ADJ foot/NOUN#0.0#0.0\na person moving closer to something or someone in a ready to fight stance#a/DET person/NOUN move/VERB closer/ADV to/ADP something/PRON or/CCONJ someone/PRON in/ADP a/DET ready/ADJ to/PART fight/VERB stance/NOUN#0.0#0.0\na man has both arms raised straight with his hands covering his face. and moves forward.#a/DET man/NOUN has/AUX both/DET arm/NOUN raise/VERB straight/ADV with/ADP his/DET hand/NOUN cover/VERB his/DET face/NOUN and/CCONJ move/NOUN forward/ADV#0.0#0.0\n
1,M013641.txt,person bends knees then slightly jumps into the air.#person/NOUN bend/VERB knee/NOUN then/ADV slightly/ADV jump/VERB into/ADP the/DET air/NOUN#0.0#0.0\na person jumps in place once.#a/DET person/NOUN jump/VERB in/ADP place/NOUN once/ADV#0.0#0.0\na person jumps once in the air.#a/DET person/NOUN jump/VERB once/ADV in/ADP the/DET air/NOUN#0.0#0.0\n
2,M013581.txt,a person who is standing with his hands by his sides takes four quick steps forward and stops.#a/DET person/NOUN who/PRON is/AUX stand/VERB with/ADP his/DET hand/NOUN by/ADP his/DET side/NOUN take/VERB four/NUM quick/ADJ step/NOUN forward/ADV and/CCONJ stop/NOUN#0.0#0.0\na person fastly walked forward#a/DET person/NOUN fastly/ADV walk/VERB forward/ADV#0.0#0.0\nperson runs forward a few steps.#person/NOUN run/VERB forward/ADV a/DET few/ADJ step/NOUN#0.0#0.0\n
3,M013602.txt,the man is pushing his right#the/DET man/NOUN is/AUX push/VERB his/DET right/NOUN#0.0#0.0\nthis person stands still then stumbles left.#this/DET person/NOUN stand/VERB still/ADV then/ADV stumble/NOUN left/VERB#0.0#0.0\nthe person is pushing with his right arm.#the/DET person/NOUN is/AUX push/VERB with/ADP his/DET right/ADJ arm/NOUN#0.0#0.0\n
4,M013593.txt,a person shifts their weight from foot to foot and shakes their hands.#a/DET person/NOUN shift/VERB their/DET weight/NOUN from/ADP foot/NOUN to/ADP foot/NOUN and/CCONJ shake/VERB their/DET hand/NOUN#0.0#0.0\na figure does the chacha goes ahead to do the mambo.#a/DET figure/NOUN does/AUX the/DET chacha/NOUN go/VERB ahead/ADV to/PART do/AUX the/DET mambo/NOUN#0.0#0.0\na woman rubs her hands together as she lifts each heal separately multiple times.#a/DET woman/NOUN rub/VERB her/DET hand/NOUN together/ADV as/SCONJ she/PRON lift/VERB each/DET heal/VERB separately/ADV multiple/ADJ time/NOUN#0.0#0.0\n


In [None]:
df_npy.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198


In [None]:
df_npy['base_filename'] = df_npy['filename'].str.replace('.npy', '')
df_texts['base_filename'] = df_texts['filename'].str.replace('.txt', '')

df_combined = pd.merge(df_npy, df_texts, on='base_filename', how='outer', suffixes=('_npy', '_txt'))
df_combined['is_match'] = df_combined['filename_npy'].notna() & df_combined['filename_txt'].notna()

In [None]:
df_combined[['filename_npy', 'filename_txt', 'is_match']]

Unnamed: 0,filename_npy,filename_txt,is_match
0,M013957.npy,M013957.txt,True
1,M013962.npy,M013962.txt,True
2,M013951.npy,M013951.txt,True
3,M013955.npy,M013955.txt,True
4,M013958.npy,M013958.txt,True
...,...,...,...
29569,,002108.txt,False
29570,,000341.txt,False
29571,,000741.txt,False
29572,,000961.txt,False


In [None]:
matching_files = df_combined[df_combined['is_match']]
non_matching_files = df_combined[~df_combined['is_match']]

print(f"Number of matching filenames: {len(matching_files)}")
print(f"Number of non-matching filenames: {len(non_matching_files)}")

Number of matching filenames: 29034
Number of non-matching filenames: 540


In [None]:
matching_files[['filename_npy', 'filename_txt']].head()

Unnamed: 0,filename_npy,filename_txt
0,M013957.npy,M013957.txt
1,M013962.npy,M013962.txt
2,M013951.npy,M013951.txt
3,M013955.npy,M013955.txt
4,M013958.npy,M013958.txt


In [None]:
df_npy.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,base_filename
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93,M013957
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45,M013962
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120,M013951
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82,M013955
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198,M013958


In [None]:
df_texts_filtered = df_texts[df_texts['filename'].isin(matching_files['filename_txt'])]
df_texts_filtered.head()

Unnamed: 0,filename,text_content,base_filename
0,M013624.txt,a standing person with both their arms in front of them pushes back with their right foot and walking slowly with their left foot#a/DET stand/VERB person/NOUN with/ADP both/CCONJ their/DET arm/NOUN in/ADP front/NOUN of/ADP them/PRON push/VERB back/ADV with/ADP their/DET right/ADJ foot/NOUN and/CCONJ walk/VERB slowly/ADV with/ADP their/DET left/ADJ foot/NOUN#0.0#0.0\na person moving closer to something or someone in a ready to fight stance#a/DET person/NOUN move/VERB closer/ADV to/ADP something/PRON or/CCONJ someone/PRON in/ADP a/DET ready/ADJ to/PART fight/VERB stance/NOUN#0.0#0.0\na man has both arms raised straight with his hands covering his face. and moves forward.#a/DET man/NOUN has/AUX both/DET arm/NOUN raise/VERB straight/ADV with/ADP his/DET hand/NOUN cover/VERB his/DET face/NOUN and/CCONJ move/NOUN forward/ADV#0.0#0.0\n,M013624
1,M013641.txt,person bends knees then slightly jumps into the air.#person/NOUN bend/VERB knee/NOUN then/ADV slightly/ADV jump/VERB into/ADP the/DET air/NOUN#0.0#0.0\na person jumps in place once.#a/DET person/NOUN jump/VERB in/ADP place/NOUN once/ADV#0.0#0.0\na person jumps once in the air.#a/DET person/NOUN jump/VERB once/ADV in/ADP the/DET air/NOUN#0.0#0.0\n,M013641
2,M013581.txt,a person who is standing with his hands by his sides takes four quick steps forward and stops.#a/DET person/NOUN who/PRON is/AUX stand/VERB with/ADP his/DET hand/NOUN by/ADP his/DET side/NOUN take/VERB four/NUM quick/ADJ step/NOUN forward/ADV and/CCONJ stop/NOUN#0.0#0.0\na person fastly walked forward#a/DET person/NOUN fastly/ADV walk/VERB forward/ADV#0.0#0.0\nperson runs forward a few steps.#person/NOUN run/VERB forward/ADV a/DET few/ADJ step/NOUN#0.0#0.0\n,M013581
3,M013602.txt,the man is pushing his right#the/DET man/NOUN is/AUX push/VERB his/DET right/NOUN#0.0#0.0\nthis person stands still then stumbles left.#this/DET person/NOUN stand/VERB still/ADV then/ADV stumble/NOUN left/VERB#0.0#0.0\nthe person is pushing with his right arm.#the/DET person/NOUN is/AUX push/VERB with/ADP his/DET right/ADJ arm/NOUN#0.0#0.0\n,M013602
4,M013593.txt,a person shifts their weight from foot to foot and shakes their hands.#a/DET person/NOUN shift/VERB their/DET weight/NOUN from/ADP foot/NOUN to/ADP foot/NOUN and/CCONJ shake/VERB their/DET hand/NOUN#0.0#0.0\na figure does the chacha goes ahead to do the mambo.#a/DET figure/NOUN does/AUX the/DET chacha/NOUN go/VERB ahead/ADV to/PART do/AUX the/DET mambo/NOUN#0.0#0.0\na woman rubs her hands together as she lifts each heal separately multiple times.#a/DET woman/NOUN rub/VERB her/DET hand/NOUN together/ADV as/SCONJ she/PRON lift/VERB each/DET heal/VERB separately/ADV multiple/ADJ time/NOUN#0.0#0.0\n,M013593


In [None]:
df_texts_filtered.shape

(29034, 3)

In [None]:
df_texts_filtered.rename(columns={'filename': 'base_filename', 'text_content': 'text_content'}, inplace=True)
df_texts_filtered.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_texts_filtered.rename(columns={'filename': 'base_filename', 'text_content': 'text_content'}, inplace=True)


Unnamed: 0,base_filename,text_content,base_filename.1
0,M013624.txt,a standing person with both their arms in front of them pushes back with their right foot and walking slowly with their left foot#a/DET stand/VERB person/NOUN with/ADP both/CCONJ their/DET arm/NOUN in/ADP front/NOUN of/ADP them/PRON push/VERB back/ADV with/ADP their/DET right/ADJ foot/NOUN and/CCONJ walk/VERB slowly/ADV with/ADP their/DET left/ADJ foot/NOUN#0.0#0.0\na person moving closer to something or someone in a ready to fight stance#a/DET person/NOUN move/VERB closer/ADV to/ADP something/PRON or/CCONJ someone/PRON in/ADP a/DET ready/ADJ to/PART fight/VERB stance/NOUN#0.0#0.0\na man has both arms raised straight with his hands covering his face. and moves forward.#a/DET man/NOUN has/AUX both/DET arm/NOUN raise/VERB straight/ADV with/ADP his/DET hand/NOUN cover/VERB his/DET face/NOUN and/CCONJ move/NOUN forward/ADV#0.0#0.0\n,M013624
1,M013641.txt,person bends knees then slightly jumps into the air.#person/NOUN bend/VERB knee/NOUN then/ADV slightly/ADV jump/VERB into/ADP the/DET air/NOUN#0.0#0.0\na person jumps in place once.#a/DET person/NOUN jump/VERB in/ADP place/NOUN once/ADV#0.0#0.0\na person jumps once in the air.#a/DET person/NOUN jump/VERB once/ADV in/ADP the/DET air/NOUN#0.0#0.0\n,M013641
2,M013581.txt,a person who is standing with his hands by his sides takes four quick steps forward and stops.#a/DET person/NOUN who/PRON is/AUX stand/VERB with/ADP his/DET hand/NOUN by/ADP his/DET side/NOUN take/VERB four/NUM quick/ADJ step/NOUN forward/ADV and/CCONJ stop/NOUN#0.0#0.0\na person fastly walked forward#a/DET person/NOUN fastly/ADV walk/VERB forward/ADV#0.0#0.0\nperson runs forward a few steps.#person/NOUN run/VERB forward/ADV a/DET few/ADJ step/NOUN#0.0#0.0\n,M013581
3,M013602.txt,the man is pushing his right#the/DET man/NOUN is/AUX push/VERB his/DET right/NOUN#0.0#0.0\nthis person stands still then stumbles left.#this/DET person/NOUN stand/VERB still/ADV then/ADV stumble/NOUN left/VERB#0.0#0.0\nthe person is pushing with his right arm.#the/DET person/NOUN is/AUX push/VERB with/ADP his/DET right/ADJ arm/NOUN#0.0#0.0\n,M013602
4,M013593.txt,a person shifts their weight from foot to foot and shakes their hands.#a/DET person/NOUN shift/VERB their/DET weight/NOUN from/ADP foot/NOUN to/ADP foot/NOUN and/CCONJ shake/VERB their/DET hand/NOUN#0.0#0.0\na figure does the chacha goes ahead to do the mambo.#a/DET figure/NOUN does/AUX the/DET chacha/NOUN go/VERB ahead/ADV to/PART do/AUX the/DET mambo/NOUN#0.0#0.0\na woman rubs her hands together as she lifts each heal separately multiple times.#a/DET woman/NOUN rub/VERB her/DET hand/NOUN together/ADV as/SCONJ she/PRON lift/VERB each/DET heal/VERB separately/ADV multiple/ADJ time/NOUN#0.0#0.0\n,M013593


In [None]:
print("df_npy columns:", df_npy.columns)
print("df_texts_filtered columns:", df_texts_filtered.columns)

df_npy columns: Index(['filename', 'motion_array', 'start_frame', 'end_frame',
       'base_filename'],
      dtype='object')
df_texts_filtered columns: Index(['base_filename', 'text_content', 'base_filename'], dtype='object')


In [None]:
df_texts_filtered = df_texts_filtered.loc[:, ~df_texts_filtered.columns.duplicated()]

df_npy = df_npy.merge(df_texts_filtered, on='base_filename', how='left')

In [None]:
df_npy.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,base_filename,text_content
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93,M013957,
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45,M013962,
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120,M013951,
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82,M013955,
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198,M013958,


In [None]:
text_content_present = df_npy['text_content'].notna().sum()
text_content_missing = df_npy['text_content'].isna().sum()

print(f"Number of entries with text content: {text_content_present}")
print(f"Number of entries without text content: {text_content_missing}")

Number of entries with text content: 0
Number of entries without text content: 29376


In [None]:
df_npy['base_filename'] = df_npy['base_filename'].str.strip()
df_texts_filtered['base_filename'] = df_texts_filtered['base_filename'].str.strip()

In [None]:
# df_npy = df_npy.drop(["text_content_x","text_content_y"], axis=1)
df_npy.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,base_filename
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93,M013957
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45,M013962
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120,M013951
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82,M013955
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198,M013958


In [None]:
df_texts_filtered.head()

Unnamed: 0,base_filename,text_content
0,M013624.txt,a standing person with both their arms in front of them pushes back with their right foot and walking slowly with their left foot#a/DET stand/VERB person/NOUN with/ADP both/CCONJ their/DET arm/NOUN in/ADP front/NOUN of/ADP them/PRON push/VERB back/ADV with/ADP their/DET right/ADJ foot/NOUN and/CCONJ walk/VERB slowly/ADV with/ADP their/DET left/ADJ foot/NOUN#0.0#0.0\na person moving closer to something or someone in a ready to fight stance#a/DET person/NOUN move/VERB closer/ADV to/ADP something/PRON or/CCONJ someone/PRON in/ADP a/DET ready/ADJ to/PART fight/VERB stance/NOUN#0.0#0.0\na man has both arms raised straight with his hands covering his face. and moves forward.#a/DET man/NOUN has/AUX both/DET arm/NOUN raise/VERB straight/ADV with/ADP his/DET hand/NOUN cover/VERB his/DET face/NOUN and/CCONJ move/NOUN forward/ADV#0.0#0.0\n
1,M013641.txt,person bends knees then slightly jumps into the air.#person/NOUN bend/VERB knee/NOUN then/ADV slightly/ADV jump/VERB into/ADP the/DET air/NOUN#0.0#0.0\na person jumps in place once.#a/DET person/NOUN jump/VERB in/ADP place/NOUN once/ADV#0.0#0.0\na person jumps once in the air.#a/DET person/NOUN jump/VERB once/ADV in/ADP the/DET air/NOUN#0.0#0.0\n
2,M013581.txt,a person who is standing with his hands by his sides takes four quick steps forward and stops.#a/DET person/NOUN who/PRON is/AUX stand/VERB with/ADP his/DET hand/NOUN by/ADP his/DET side/NOUN take/VERB four/NUM quick/ADJ step/NOUN forward/ADV and/CCONJ stop/NOUN#0.0#0.0\na person fastly walked forward#a/DET person/NOUN fastly/ADV walk/VERB forward/ADV#0.0#0.0\nperson runs forward a few steps.#person/NOUN run/VERB forward/ADV a/DET few/ADJ step/NOUN#0.0#0.0\n
3,M013602.txt,the man is pushing his right#the/DET man/NOUN is/AUX push/VERB his/DET right/NOUN#0.0#0.0\nthis person stands still then stumbles left.#this/DET person/NOUN stand/VERB still/ADV then/ADV stumble/NOUN left/VERB#0.0#0.0\nthe person is pushing with his right arm.#the/DET person/NOUN is/AUX push/VERB with/ADP his/DET right/ADJ arm/NOUN#0.0#0.0\n
4,M013593.txt,a person shifts their weight from foot to foot and shakes their hands.#a/DET person/NOUN shift/VERB their/DET weight/NOUN from/ADP foot/NOUN to/ADP foot/NOUN and/CCONJ shake/VERB their/DET hand/NOUN#0.0#0.0\na figure does the chacha goes ahead to do the mambo.#a/DET figure/NOUN does/AUX the/DET chacha/NOUN go/VERB ahead/ADV to/PART do/AUX the/DET mambo/NOUN#0.0#0.0\na woman rubs her hands together as she lifts each heal separately multiple times.#a/DET woman/NOUN rub/VERB her/DET hand/NOUN together/ADV as/SCONJ she/PRON lift/VERB each/DET heal/VERB separately/ADV multiple/ADJ time/NOUN#0.0#0.0\n


In [None]:
df_texts_filtered.shape

(29034, 2)

In [None]:
df_npy = df_npy.merge(df_texts_filtered, on='base_filename', how='left')

In [None]:
df_npy.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,base_filename,text_content
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93,M013957,
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45,M013962,
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120,M013951,
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82,M013955,
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198,M013958,


In [None]:
df_npy['text_content'].isna().sum()

29376

In [None]:
df_npy['base_filename'] = df_npy['filename'].str.replace('.npy', '', regex=False).str.strip()
df_texts_filtered['base_filename'] = df_texts_filtered['base_filename'].str.replace('.txt', '', regex=False).str.strip()

In [None]:
df_npy.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,base_filename,text_content
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93,M013957,
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45,M013962,
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120,M013951,
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82,M013955,
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198,M013958,


In [None]:
df_texts_filtered.head()

Unnamed: 0,base_filename,text_content
0,M013624,a standing person with both their arms in front of them pushes back with their right foot and walking slowly with their left foot#a/DET stand/VERB person/NOUN with/ADP both/CCONJ their/DET arm/NOUN in/ADP front/NOUN of/ADP them/PRON push/VERB back/ADV with/ADP their/DET right/ADJ foot/NOUN and/CCONJ walk/VERB slowly/ADV with/ADP their/DET left/ADJ foot/NOUN#0.0#0.0\na person moving closer to something or someone in a ready to fight stance#a/DET person/NOUN move/VERB closer/ADV to/ADP something/PRON or/CCONJ someone/PRON in/ADP a/DET ready/ADJ to/PART fight/VERB stance/NOUN#0.0#0.0\na man has both arms raised straight with his hands covering his face. and moves forward.#a/DET man/NOUN has/AUX both/DET arm/NOUN raise/VERB straight/ADV with/ADP his/DET hand/NOUN cover/VERB his/DET face/NOUN and/CCONJ move/NOUN forward/ADV#0.0#0.0\n
1,M013641,person bends knees then slightly jumps into the air.#person/NOUN bend/VERB knee/NOUN then/ADV slightly/ADV jump/VERB into/ADP the/DET air/NOUN#0.0#0.0\na person jumps in place once.#a/DET person/NOUN jump/VERB in/ADP place/NOUN once/ADV#0.0#0.0\na person jumps once in the air.#a/DET person/NOUN jump/VERB once/ADV in/ADP the/DET air/NOUN#0.0#0.0\n
2,M013581,a person who is standing with his hands by his sides takes four quick steps forward and stops.#a/DET person/NOUN who/PRON is/AUX stand/VERB with/ADP his/DET hand/NOUN by/ADP his/DET side/NOUN take/VERB four/NUM quick/ADJ step/NOUN forward/ADV and/CCONJ stop/NOUN#0.0#0.0\na person fastly walked forward#a/DET person/NOUN fastly/ADV walk/VERB forward/ADV#0.0#0.0\nperson runs forward a few steps.#person/NOUN run/VERB forward/ADV a/DET few/ADJ step/NOUN#0.0#0.0\n
3,M013602,the man is pushing his right#the/DET man/NOUN is/AUX push/VERB his/DET right/NOUN#0.0#0.0\nthis person stands still then stumbles left.#this/DET person/NOUN stand/VERB still/ADV then/ADV stumble/NOUN left/VERB#0.0#0.0\nthe person is pushing with his right arm.#the/DET person/NOUN is/AUX push/VERB with/ADP his/DET right/ADJ arm/NOUN#0.0#0.0\n
4,M013593,a person shifts their weight from foot to foot and shakes their hands.#a/DET person/NOUN shift/VERB their/DET weight/NOUN from/ADP foot/NOUN to/ADP foot/NOUN and/CCONJ shake/VERB their/DET hand/NOUN#0.0#0.0\na figure does the chacha goes ahead to do the mambo.#a/DET figure/NOUN does/AUX the/DET chacha/NOUN go/VERB ahead/ADV to/PART do/AUX the/DET mambo/NOUN#0.0#0.0\na woman rubs her hands together as she lifts each heal separately multiple times.#a/DET woman/NOUN rub/VERB her/DET hand/NOUN together/ADV as/SCONJ she/PRON lift/VERB each/DET heal/VERB separately/ADV multiple/ADJ time/NOUN#0.0#0.0\n


In [None]:
df_npy = df_npy.merge(df_texts_filtered[['base_filename', 'text_content']], on='base_filename', how='left')

In [None]:
df_npy.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,base_filename,text_content_x,text_content_y
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93,M013957,,a man walks forward and turns to the right.#a/DET man/NOUN walk/VERB forward/ADV and/CCONJ turn/VERB to/ADP the/DET right/NOUN#0.0#0.0\na man walks to his left in a half circle.#a/DET man/NOUN walk/VERB to/ADP his/DET left/NOUN in/ADP a/DET half/ADJ circle/NOUN#0.0#0.0\na person walks curved to the left.#a/DET person/NOUN walk/VERB curved/ADJ to/ADP the/DET left/NOUN#0.0#0.0\n
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45,M013962,,"a man is in a seated postion. he alternates between moving his left and right hands in a driving motion.#a/DET man/NOUN is/AUX in/ADP a/DET seated/ADJ postion/NOUN he/PRON alternate/VERB between/ADP move/VERB his/DET left/ADJ and/CCONJ right/ADJ hand/NOUN in/ADP a/DET driving/NOUN motion/NOUN#0.0#0.0\na sitting person gestures with their hands.#a/DET sit/VERB person/NOUN gesture/VERB with/ADP their/DET hand/NOUN#0.0#0.0\nfrom a seated position, the person is facing away from us with their arms bent and reached out ahead of them, and they move their left arm up while their right arm goes down, back and forth, motioning as if to turn a wheel#from/ADP a/DET seated/ADJ position/NOUN the/DET person/NOUN is/AUX face/VERB away/ADV from/ADP us/PRON with/ADP their/DET arm/NOUN bent/ADJ and/CCONJ reach/VERB out/ADP ahead/ADV of/ADP them/PRON and/CCONJ they/PRON move/VERB their/DET left/ADJ arm/NOUN up/ADP while/SCONJ their/DET right/ADJ arm/NOUN go/VERB down/ADV back/ADV and/CCONJ forth/ADV motion/VERB as/SCONJ if/SCONJ to/PART turn/VERB a/DET wheel/NOUN#0.0#0.0\n"
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120,M013951,,"a man gets down on his hands and knees.#a/DET man/NOUN get/VERB down/ADP on/ADP his/DET hand/NOUN and/CCONJ knee/NOUN#0.0#0.0\na man takes one step forward, drops down on 1 knee and lowers his back and both hands, then lowers the other knee.#a/DET man/NOUN take/VERB one/NUM step/NOUN forward/ADV drop/VERB down/ADV on/ADP knee/NOUN and/CCONJ lower/VERB his/DET back/NOUN and/CCONJ both/DET hand/NOUN then/ADV lower/VERB the/DET other/ADJ knee/NOUN#0.0#0.0\na person steps in and knelt down with his hands on the floor.#a/DET person/NOUN step/VERB in/ADP and/CCONJ knelt/VERB down/ADP with/ADP his/DET hand/NOUN on/ADP the/DET floor/NOUN#0.0#0.0\n"
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82,M013955,,a person warms up pectorals by moving arms inwards and outwards.#a/DET person/NOUN warm/VERB up/ADP pectoral/NOUN by/ADP move/VERB arm/NOUN inward/NOUN and/CCONJ outward/NOUN#0.0#0.0\nthe man exercises his arms by waving them inward then outward.#the/DET man/NOUN exercise/VERB his/DET arm/NOUN by/ADP wave/VERB them/PRON inward/ADV then/ADV outward/ADV#0.0#0.0\nperson moving hands in various motions#person/NOUN move/VERB hand/NOUN in/ADP various/ADJ motion/NOUN#0.0#0.0\n
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198,M013958,,"a man lifts his left knee to his right elbow, then his right knee to his left elbow multiple times and then squats.#a/DET man/NOUN lift/VERB his/DET left/ADJ knee/NOUN to/ADP his/DET right/ADJ elbow/NOUN then/ADV his/DET right/ADJ knee/NOUN to/ADP his/DET left/ADJ elbow/NOUN multiple/ADJ time/NOUN and/CCONJ then/ADV squat/VERB#0.0#0.0\na man lifts his right knee to his left elbow and vice-versa several times, before holding his arms out in front of him and dropping into a squat.#a/DET man/NOUN lift/VERB his/DET right/ADJ knee/NOUN to/ADP his/DET left/ADJ elbow/NOUN and/CCONJ viceversa/VERB several/ADJ time/NOUN before/ADP hold/VERB his/DET arm/NOUN out/SCONJ in/ADP front/NOUN of/ADP him/PRON and/CCONJ drop/VERB into/ADP a/DET squat/NOUN#0.0#0.0\nbrings opposite knee to opposite elbow back and forth multiple times then goes into a squat.#bring/VERB opposite/ADJ knee/NOUN to/ADP opposite/ADJ elbow/NOUN back/ADV and/CCONJ forth/ADV multiple/ADJ time/NOUN then/ADV go/VERB into/ADP a/DET squat/NOUN#0.0#0.0\n"


In [None]:
text_content_present = df_npy['text_content_y'].notna().sum()
text_content_missing = df_npy['text_content_y'].isna().sum()

print(f"Number of entries with text content: {text_content_present}")
print(f"Number of entries without text content: {text_content_missing}")

Number of entries with text content: 29034
Number of entries without text content: 342


In [None]:
df_final = df_npy[df_npy['text_content_y'].notna()]

In [None]:
df_final = df_final[['filename', 'motion_array', 'start_frame', 'end_frame', 'text_content_y']]
df_final.rename(columns={'text_content_y': 'text_description'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final.rename(columns={'text_content_y': 'text_description'}, inplace=True)


In [None]:
df_final.shape

(29034, 5)

In [None]:
df_final.head()

Unnamed: 0,filename,motion_array,start_frame,end_frame,text_description
0,M013957.npy,"[0.0, 0.96446025, 0.0, 0.057798058, 0.8795496, -0.008591607, -0.061404742, 0.8753187, -0.018910427, -0.0040980056, 1.0887009, -0.04310076, 0.09564249, 0.48801637, -0.023039669, -0.10019813, 0.4876872, -0.040904045, -0.010430377, 1.2271684, -0.007183317, 0.06460866, 0.06013452, -0.07737604, -0.05235154, 0.066211194, -0.0761334, -0.007210072, 1.284167, -0.0015700138, 0.0847856, 0.006765423, 0.05416476, -0.11413582, 0.012341489, 0.048794106, 0.0050030677, 1.5029547, -0.011650272, 0.0700652, 1.3964866, -0.019331202, -0.082037866, 1.4063969, -0.005964428, 0.0047016777, 1.5891668, 0.044781104, 0.19948089, 1.3778354, -0.034362942, -0.20195563, 1.3859515, -0.024044171, 0.23811609, 1.1255673, -0.06325798, -0.23151031, 1.1268196, -0.058605686, 0.2736099, 0.89081895, 0.056723833, -0.25741023, 0.87891346, 0.04485941, -8.996891e-05, 0.96432513, 0.00080322725, 0.057771083, 0.8794574, -0.007787891, -0.06138201, 0.87522626, -0.018665524, -0.004026055, 1.0886983, -0.041929036, 0.095583595, 0.48792836, -0.0224341, -0.10016667, 0.48760188, -0.04079978, -0.010502814, 1.2272358, -0.0063085505, 0.06463921, 0.060105484, -0.07728323, -0.052284826, 0.06614671, -0.07622971, -0.0073449328, 1.2842423, -0.0007402207, 0.084683105, 0.0065253107, 0.054192044, -0.11419296, ...]",0,93,a man walks forward and turns to the right.#a/DET man/NOUN walk/VERB forward/ADV and/CCONJ turn/VERB to/ADP the/DET right/NOUN#0.0#0.0\na man walks to his left in a half circle.#a/DET man/NOUN walk/VERB to/ADP his/DET left/NOUN in/ADP a/DET half/ADJ circle/NOUN#0.0#0.0\na person walks curved to the left.#a/DET person/NOUN walk/VERB curved/ADJ to/ADP the/DET left/NOUN#0.0#0.0\n
1,M013962.npy,"[0.0, 0.55056447, 0.0, 0.05949574, 0.46852037, -0.018795379, -0.073513396, 0.4725657, -0.02421427, -0.003401407, 0.640326, 0.09613267, 0.21057896, 0.4892717, -0.3816763, -0.22957948, 0.52161866, -0.3784514, 0.011783241, 0.78073686, 0.072517775, 0.14316474, 0.07123692, -0.29392272, -0.25213063, 0.105600566, -0.2913147, 0.019783085, 0.8336719, 0.051911972, 0.21314013, 0.0060117496, -0.40072855, -0.33416057, 0.030611852, -0.39118353, 0.03998228, 1.0516176, 0.06641982, 0.1007498, 0.9445095, 0.05978805, -0.051007386, 0.9582855, 0.05624639, 0.053674027, 1.131299, 0.002540864, 0.2166091, 0.9997237, 0.030628614, -0.16634957, 0.99588, 0.036047507, 0.33175927, 0.87807935, -0.16407615, -0.30514035, 0.7922773, -0.056150027, 0.25584415, 1.0552013, -0.3474535, -0.25680012, 0.8476052, -0.31583306, 0.0002875581, 0.55122125, 0.0005868984, 0.059664134, 0.46914035, -0.018423468, -0.07330023, 0.4735907, -0.024566466, -0.0034130835, 0.64034414, 0.09730101, 0.2101269, 0.48924023, -0.38159883, -0.2320195, 0.52048236, -0.37791654, 0.011129899, 0.78080547, 0.073582865, 0.1413411, 0.07158574, -0.29310217, -0.25247908, 0.1046547, -0.289366, 0.019767746, 0.8334713, 0.05254923, 0.21125983, 0.004916446, -0.39905035, -0.33481696, ...]",0,45,"a man is in a seated postion. he alternates between moving his left and right hands in a driving motion.#a/DET man/NOUN is/AUX in/ADP a/DET seated/ADJ postion/NOUN he/PRON alternate/VERB between/ADP move/VERB his/DET left/ADJ and/CCONJ right/ADJ hand/NOUN in/ADP a/DET driving/NOUN motion/NOUN#0.0#0.0\na sitting person gestures with their hands.#a/DET sit/VERB person/NOUN gesture/VERB with/ADP their/DET hand/NOUN#0.0#0.0\nfrom a seated position, the person is facing away from us with their arms bent and reached out ahead of them, and they move their left arm up while their right arm goes down, back and forth, motioning as if to turn a wheel#from/ADP a/DET seated/ADJ position/NOUN the/DET person/NOUN is/AUX face/VERB away/ADV from/ADP us/PRON with/ADP their/DET arm/NOUN bent/ADJ and/CCONJ reach/VERB out/ADP ahead/ADV of/ADP them/PRON and/CCONJ they/PRON move/VERB their/DET left/ADJ arm/NOUN up/ADP while/SCONJ their/DET right/ADJ arm/NOUN go/VERB down/ADV back/ADV and/CCONJ forth/ADV motion/VERB as/SCONJ if/SCONJ to/PART turn/VERB a/DET wheel/NOUN#0.0#0.0\n"
2,M013951.npy,"[0.0, 0.93152547, 0.0, 0.055670176, 0.8447783, -4.805252e-05, -0.062056884, 0.84312546, -0.020217191, 0.0018790048, 1.055664, -0.043546483, 0.12198639, 0.45735243, -0.021086827, -0.123658314, 0.46136838, -0.072309926, -0.010620123, 1.1969314, -0.023779012, 0.117791995, 0.03510933, -0.11431555, -0.108019575, 0.040886175, -0.13651705, -0.0041049104, 1.2516445, -0.0078188535, 0.15237212, 0.01362489, 0.023165204, -0.17714089, 0.007698552, -0.008271709, 0.010858947, 1.4704864, -0.00960019, 0.07090641, 1.3659726, -0.022137158, -0.07368891, 1.376861, -0.0016909763, 0.015334971, 1.5663412, 0.02793505, 0.18084502, 1.2998128, -0.051439494, -0.18245357, 1.3276627, -0.031270362, 0.2665516, 1.0578512, -0.042735785, -0.27992007, 1.0841663, -0.051936984, 0.4148227, 0.83958936, -0.008976072, -0.4671819, 0.89181274, -0.02425304, -0.0018563999, 0.93128806, 0.00081173994, 0.05378955, 0.8445255, 0.0009935052, -0.0639703, 0.84283173, -0.018979482, -1.3226876e-05, 1.0552648, -0.043194786, 0.12072998, 0.45720354, -0.019981347, -0.12432146, 0.46088704, -0.0711606, -0.012434512, 1.1964281, -0.022650318, 0.11778556, 0.0351164, -0.113961086, -0.10828896, 0.04060382, -0.1365624, -0.0055230646, 1.2510769, -0.006636181, 0.15241034, 0.013947734, 0.023557536, -0.17729922, ...]",0,120,"a man gets down on his hands and knees.#a/DET man/NOUN get/VERB down/ADP on/ADP his/DET hand/NOUN and/CCONJ knee/NOUN#0.0#0.0\na man takes one step forward, drops down on 1 knee and lowers his back and both hands, then lowers the other knee.#a/DET man/NOUN take/VERB one/NUM step/NOUN forward/ADV drop/VERB down/ADV on/ADP knee/NOUN and/CCONJ lower/VERB his/DET back/NOUN and/CCONJ both/DET hand/NOUN then/ADV lower/VERB the/DET other/ADJ knee/NOUN#0.0#0.0\na person steps in and knelt down with his hands on the floor.#a/DET person/NOUN step/VERB in/ADP and/CCONJ knelt/VERB down/ADP with/ADP his/DET hand/NOUN on/ADP the/DET floor/NOUN#0.0#0.0\n"
3,M013955.npy,"[0.0, 1.000014, 0.0, 0.054978974, 0.9196506, 0.033812564, -0.07407447, 0.9251913, 0.031446695, 0.0027503194, 1.1307806, 0.014238147, 0.11015873, 0.5305503, 0.056070514, -0.16181663, 0.54548603, 0.012157365, 0.020766601, 1.2648618, -0.03267751, 0.06518342, 0.11638263, 0.17200479, -0.1823584, 0.13962187, 0.13874629, 0.029073339, 1.3144997, -0.060205396, 0.11997941, 0.008625549, 0.09490716, -0.26503575, 0.037772845, 0.06721095, 0.059068456, 1.5306035, -0.082970925, 0.11993777, 1.4176427, -0.06299898, -0.035780124, 1.4420104, -0.06988531, 0.075813636, 1.6146109, -0.14023703, 0.24711145, 1.3862528, -0.0757966, -0.1586157, 1.4469097, -0.07343073, 0.25331444, 1.1407887, -0.00045950396, -0.25913095, 1.2159643, 0.0025840334, 0.24965158, 0.8752748, -0.016308706, -0.21681227, 0.95999485, -0.071726106, 0.0023321835, 0.99311066, 0.0037532474, 0.06113871, 0.91564924, 0.037896547, -0.06804754, 0.91646916, 0.03906798, 0.00066382706, 1.124205, 0.014784629, 0.10930257, 0.52545404, 0.057059687, -0.16565226, 0.540221, 0.0050459206, 0.016760811, 1.257476, -0.03504684, 0.062391087, 0.110551715, 0.16954888, -0.19863226, 0.13196023, 0.12083841, 0.026245583, 1.306631, -0.063056335, 0.13193017, 0.009282725, 0.095607385, -0.27985957, ...]",0,82,a person warms up pectorals by moving arms inwards and outwards.#a/DET person/NOUN warm/VERB up/ADP pectoral/NOUN by/ADP move/VERB arm/NOUN inward/NOUN and/CCONJ outward/NOUN#0.0#0.0\nthe man exercises his arms by waving them inward then outward.#the/DET man/NOUN exercise/VERB his/DET arm/NOUN by/ADP wave/VERB them/PRON inward/ADV then/ADV outward/ADV#0.0#0.0\nperson moving hands in various motions#person/NOUN move/VERB hand/NOUN in/ADP various/ADJ motion/NOUN#0.0#0.0\n
4,M013958.npy,"[0.0, 0.9899903, 0.0, 0.066741005, 0.91150033, 0.003034182, -0.049037963, 0.8918728, -0.0065268315, -0.01803435, 1.1074384, -0.056488078, 0.098784775, 0.542579, 0.13648658, -0.09630106, 0.5067833, 0.034935042, -0.04872401, 1.2401814, -0.012432959, 0.07742814, 0.15435049, -0.05277759, -0.08371021, 0.082111605, 0.009092331, -0.056892946, 1.2932767, 0.007688675, 0.10523365, 0.07515083, 0.06346607, -0.1140541, 0.012569676, 0.13781351, -0.096792944, 1.5052602, 0.04756122, -0.00474184, 1.4199954, 0.018879673, -0.15992567, 1.3906558, 0.029141158, -0.12942325, 1.5379863, 0.13965543, 0.11747682, 1.4686391, 0.023195736, -0.28112006, 1.3700643, 0.032756776, 0.17643097, 1.2426171, -0.08359966, -0.28161716, 1.1082028, 0.0073459446, 0.27517673, 1.0272723, 0.0373884, -0.35124144, 0.89616144, 0.15908316, 0.00234579, 0.99003416, 0.0037243126, 0.06974022, 0.91209817, 0.0065908507, -0.04571218, 0.891593, -0.0048854984, -0.015948746, 1.1081146, -0.051343877, 0.10163577, 0.5455119, 0.14636368, -0.097138524, 0.5051248, 0.010751665, -0.04935129, 1.2401866, -0.007245071, 0.09534568, 0.1399882, -0.0036674885, -0.08436916, 0.07969467, 0.00644549, -0.058435448, 1.2930018, 0.013218239, 0.11169225, 0.078711495, 0.12492573, -0.11427241, ...]",0,198,"a man lifts his left knee to his right elbow, then his right knee to his left elbow multiple times and then squats.#a/DET man/NOUN lift/VERB his/DET left/ADJ knee/NOUN to/ADP his/DET right/ADJ elbow/NOUN then/ADV his/DET right/ADJ knee/NOUN to/ADP his/DET left/ADJ elbow/NOUN multiple/ADJ time/NOUN and/CCONJ then/ADV squat/VERB#0.0#0.0\na man lifts his right knee to his left elbow and vice-versa several times, before holding his arms out in front of him and dropping into a squat.#a/DET man/NOUN lift/VERB his/DET right/ADJ knee/NOUN to/ADP his/DET left/ADJ elbow/NOUN and/CCONJ viceversa/VERB several/ADJ time/NOUN before/ADP hold/VERB his/DET arm/NOUN out/SCONJ in/ADP front/NOUN of/ADP him/PRON and/CCONJ drop/VERB into/ADP a/DET squat/NOUN#0.0#0.0\nbrings opposite knee to opposite elbow back and forth multiple times then goes into a squat.#bring/VERB opposite/ADJ knee/NOUN to/ADP opposite/ADJ elbow/NOUN back/ADV and/CCONJ forth/ADV multiple/ADJ time/NOUN then/ADV go/VERB into/ADP a/DET squat/NOUN#0.0#0.0\n"


In [None]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 29034 entries, 0 to 29375
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   filename          29034 non-null  object
 1   motion_array      29034 non-null  object
 2   start_frame       29034 non-null  int64 
 3   end_frame         29034 non-null  int64 
 4   text_description  29034 non-null  object
dtypes: int64(2), object(3)
memory usage: 1.3+ MB


In [None]:
df_final['text_description'].notna().sum()

29034

In [None]:
df_final['text_description'].isna().sum()

0

In [None]:
df_final['motion_array_list'] = df_final['motion_array'].apply(lambda x: x.tolist())

NameError: name 'df_final' is not defined

In [None]:
df_final['motion_array_list'].type

In [None]:
pd.set_option('display.max_colwidth', None)
df_final.to_excel('npy_to_text_mapping.xlsx', index=False)

In [None]:
from google.colab import files
files.download('npy_to_text_mapping.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>