In [2]:
!pip3 -q install h5py validators tqdm numpy argparse requests colorama

In [None]:
!git clone https://github.com/CMU-MultiComp-Lab/CMU-MultimodalSDK.git
!cp -r CMU-MultimodalSDK/mmsdk ./
!rm -rf CMU-MultimodalSDK

In [3]:
import mmsdk
import os
import re
import numpy as np
from mmsdk import mmdatasdk as md
from subprocess import check_call, CalledProcessError

DATA_PATH = './cmu_mosi/'
if not os.path.exists(DATA_PATH):
    os.makedirs(DATA_PATH)
    
# download highlevel features, low-level (raw) data and labels for the dataset MOSI
# if the files are already present, instead of downloading it you just load it yourself.
# here we use CMU_MOSI dataset as example.

DATASET = md.cmu_mosi
# obtain the train/dev/test splits - these splits are based on video IDs
train_split = DATASET.standard_folds.standard_train_fold
dev_split = DATASET.standard_folds.standard_valid_fold
test_split = DATASET.standard_folds.standard_test_fold

try:
    md.mmdataset(DATASET.highlevel, DATA_PATH)
except RuntimeError:
    print("High-level features have been downloaded previously.")

try:
    md.mmdataset(DATASET.raw, DATA_PATH)
except RuntimeError:
    print("Raw data have been downloaded previously.")
    
try:
    md.mmdataset(DATASET.labels, DATA_PATH)
except RuntimeError:
    print("Labels have been downloaded previously.")

[91m[1m[2024-01-17 06:01:32.246] | Error   | [0m./cmu_mosi/CMU_MOSI_TimestampedWordVectors.csd file already exists ...
High-level features have been downloaded previously.
[91m[1m[2024-01-17 06:01:32.247] | Error   | [0m./cmu_mosi/CMU_MOSI_TimestampedWords.csd file already exists ...
Raw data have been downloaded previously.
[91m[1m[2024-01-17 06:01:32.247] | Error   | [0m./cmu_mosi/CMU_MOSI_Opinion_Labels.csd file already exists ...
Labels have been downloaded previously.


In [4]:
data_files = os.listdir(DATA_PATH)
print('\n'.join(data_files))

CMU_MOSI_TimestampedWordVectors.csd
CMU_MOSI_Visual_Facet_42.csd
CMU_MOSI_Opinion_Labels.csd
CMU_MOSI_TimestampedWords.csd
CMU_MOSI_TimestampedPhones.csd
CMU_MOSI_openSMILE_IS09.csd
CMU_MOSI_OpenSmile_EB10.csd
CMU_MOSI_Visual_Facet_41.csd


In [5]:
# define your different modalities - refer to the filenames of the CSD files
visual_field = 'CMU_MOSI_Visual_Facet_42'
acoustic_field = 'CMU_MOSI_OpenSmile_EB10'
text_field = 'CMU_MOSI_TimestampedWords'
label_field = 'CMU_MOSI_Opinion_Labels'

features = [
    text_field, 
    visual_field, 
    acoustic_field,
]

recipe = {feat: os.path.join(DATA_PATH, feat) + '.csd' for feat in features}
dataset = md.mmdataset(recipe)

[92m[1m[2024-01-17 06:01:37.810] | Success | [0mComputational sequence read from file ./cmu_mosi/CMU_MOSI_TimestampedWords.csd ...
[94m[1m[2024-01-17 06:01:37.829] | Status  | [0mChecking the integrity of the <words> computational sequence ...
[94m[1m[2024-01-17 06:01:37.830] | Status  | [0mChecking the format of the data in <words> computational sequence ...


                                                                   

[92m[1m[2024-01-17 06:01:37.893] | Success | [0m<words> computational sequence data in correct format.
[94m[1m[2024-01-17 06:01:37.893] | Status  | [0mChecking the format of the metadata in <words> computational sequence ...
[92m[1m[2024-01-17 06:01:37.894] | Success | [0mComputational sequence read from file ./cmu_mosi/CMU_MOSI_Visual_Facet_42.csd ...
[94m[1m[2024-01-17 06:01:37.906] | Status  | [0mChecking the integrity of the <FACET_4.2> computational sequence ...
[94m[1m[2024-01-17 06:01:37.906] | Status  | [0mChecking the format of the data in <FACET_4.2> computational sequence ...


                                                                   

[92m[1m[2024-01-17 06:01:37.951] | Success | [0m<FACET_4.2> computational sequence data in correct format.
[94m[1m[2024-01-17 06:01:37.951] | Status  | [0mChecking the format of the metadata in <FACET_4.2> computational sequence ...
[92m[1m[2024-01-17 06:01:37.952] | Success | [0mComputational sequence read from file ./cmu_mosi/CMU_MOSI_OpenSmile_EB10.csd ...
[94m[1m[2024-01-17 06:01:37.964] | Status  | [0mChecking the integrity of the <OpenSmile_emobase2010> computational sequence ...
[94m[1m[2024-01-17 06:01:37.964] | Status  | [0mChecking the format of the data in <OpenSmile_emobase2010> computational sequence ...


                                                                   

[92m[1m[2024-01-17 06:01:38.002] | Success | [0m<OpenSmile_emobase2010> computational sequence data in correct format.
[94m[1m[2024-01-17 06:01:38.002] | Status  | [0mChecking the format of the metadata in <OpenSmile_emobase2010> computational sequence ...
[92m[1m[2024-01-17 06:01:38.002] | Success | [0mDataset initialized successfully ... 




In [6]:
print(list(dataset.keys()))
print("=" * 80)

print(list(dataset[visual_field].keys())[:10])
print("=" * 80)

some_id = list(dataset[visual_field].keys())[15]
print(list(dataset[visual_field][some_id].keys()))
print("=" * 80)

print(dataset[visual_field][some_id]['features'].shape, dataset[visual_field][some_id]['intervals'].shape)
print(dataset[text_field][some_id]['features'].shape, dataset[text_field][some_id]['intervals'].shape)
print(dataset[acoustic_field][some_id]['features'].shape, dataset[acoustic_field][some_id]['intervals'].shape)

print("Different modalities have different number of time steps!")

['CMU_MOSI_TimestampedWords', 'CMU_MOSI_Visual_Facet_42', 'CMU_MOSI_OpenSmile_EB10']
['03bSnISJMiM', '0h-zjBukYpk', '1DmNV9C1hbY', '1iG0909rllw', '2WGyTLYerpo', '2iD-tVS8NPw', '5W7Z1C_fDaE', '6Egk_28TtTM', '6_0THN4chvY', '73jzhE8R1TQ']
['features', 'intervals']
(5403, 35) (5403, 2)
(645, 1) (645, 2)
(25, 1585) (25, 2)
Different modalities have different number of time steps!
