In [1]:
from datasets import load_dataset

In [2]:
subset = "speech_clean"

In [3]:
vibravox = load_dataset("Cnam-LMSSC/vibravox-test", subset)

Generating train split: 100%|██████████| 48/48 [00:00<00:00, 148.41 examples/s]
Generating validation split: 100%|██████████| 48/48 [00:00<00:00, 157.79 examples/s]
Generating test split: 100%|██████████| 48/48 [00:00<00:00, 183.55 examples/s]


In [4]:
type(vibravox)

datasets.dataset_dict.DatasetDict

In [5]:
dir(vibravox)

['__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__ior__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__or__',
 '__orig_bases__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__ror__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_values_features',
 '_check_values_type',
 'align_labels_with_mapping',
 'cache_files',
 'cast',
 'cast_column',
 'class_encode_column',
 'cleanup_cache_files',
 'clear',
 'column_names',
 'copy',
 'data',
 'filter',
 'flatten',
 'flatten_indices',
 'formatted_as',
 'from_csv',
 'from_json',
 'from_parquet',
 'from_text',
 'fromkeys',
 'get',
 'items',
 'keys',
 'load_from_disk',
 'map',
 'nu

In [7]:
vibravox['train'][0]

{'audio.headset_microphone': <datasets.features._torchcodec.AudioDecoder at 0x122a02840>,
 'audio.forehead_accelerometer': <datasets.features._torchcodec.AudioDecoder at 0x122a030e0>,
 'audio.soft_in_ear_microphone': <datasets.features._torchcodec.AudioDecoder at 0x1228b69f0>,
 'audio.rigid_in_ear_microphone': <datasets.features._torchcodec.AudioDecoder at 0x100ca05f0>,
 'audio.temple_vibration_pickup': <datasets.features._torchcodec.AudioDecoder at 0x100c62690>,
 'audio.throat_microphone': <datasets.features._torchcodec.AudioDecoder at 0x12316b2c0>,
 'gender': 'female',
 'speaker_id': '9uFUrr9Hg2',
 'sentence_id': 17419,
 'duration': 3.294,
 'raw_text': 'Cependant, cette notion est surtout grammaticale.',
 'normalized_text': 'cependant cette notion est surtout grammaticale',
 'phonemized_text': 'səpɑ̃dɑ̃ sɛt nosjɔ̃ ɛ syʁtu ɡʁamatikal'}

In [8]:
headset = vibravox['train'][0]['audio.headset_microphone']

In [10]:
type(headset)

datasets.features._torchcodec.AudioDecoder

Metadata: AudioStreamMetadata:
  duration_seconds_from_header: 3.293938
  begin_stream_seconds_from_header: None
  bit_rate: 768000.0
  codec: pcm_s16le
  stream_index: 0
  sample_rate: 48000
  num_channels: 1
  sample_format: s16


Trying get_all_samples()...
Audio samples type: <class 'torchcodec._frame.AudioSamples'>
Audio samples shape: No shape
Sample rate: 48000

Original audio entry structure:
Type: <class 'datasets.features._torchcodec.AudioDecoder'>
Available methods: ['get_all_samples', 'get_samples_played_in_range', 'metadata', 'stream_index']


## Summary: How to Play Audio from Vibravox Dataset

Here are the different methods you can use to play audio from the `headset` variable:

### Method 1: Jupyter Notebook Audio Widget (Recommended for notebooks)
```python
from IPython.display import Audio
audio_samples = headset.get_all_samples()
audio_array = audio_samples.data.numpy()
sample_rate = headset.metadata.sample_rate
Audio(audio_array.flatten(), rate=sample_rate)
```

### Method 2: System Audio Player (macOS/Linux/Windows)
```python
import soundfile as sf
import subprocess
import tempfile

# Save to temporary file and play
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
    sf.write(tmp_file.name, audio_array.flatten(), sample_rate)
    subprocess.run(['afplay', tmp_file.name])  # macOS
    # subprocess.run(['aplay', tmp_file.name])  # Linux
    # subprocess.run(['start', tmp_file.name], shell=True)  # Windows
```

### Method 3: Pygame (More Control)
```python
import pygame
pygame.mixer.init(frequency=sample_rate, size=-16, channels=1)
audio_int16 = (audio_array.flatten() * 32767).astype(np.int16)
sound = pygame.sndarray.make_sound(audio_int16)
sound.play()
```

### Audio Properties
- **Duration**: ~3.29 seconds
- **Sample Rate**: 48,000 Hz
- **Channels**: 1 (mono)
- **Format**: 16-bit PCM

In [15]:
from IPython.display import Audio
audio_samples = headset.get_all_samples()
audio_array = audio_samples.data.numpy()
sample_rate = headset.metadata.sample_rate
Audio(audio_array.flatten(), rate=sample_rate)

In [17]:
import soundfile as sf
import subprocess
import tempfile

with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
    sf.write(tmp_file.name, audio_array.flatten(), sample_rate)
    subprocess.run(['afplay', tmp_file.name])  # macOS

In [18]:
throat = vibravox['train'][0]['audio.throat_microphone']

In [19]:
from IPython.display import Audio
audio_samples = headset.get_all_samples()
audio_array = audio_samples.data.numpy()
sample_rate = headset.metadata.sample_rate
Audio(audio_array.flatten(), rate=sample_rate)