# Data exploration


In [1]:
from os import listdir

import numpy as np
import pandas as pd
import torchaudio

METADATA_DEV = 'data/metadata_dev'
FOA_DEV = 'data/foa_dev'

Traget sound events are:

0. Female speech, woman speaking
1. Male speech, man speaking
2. Clapping
3. Telephone
4. Laughter
5. Domestic sounds
6. Walk, footsteps
7. Door, open or close
8. Music
9. Musical instrument
10. Water tap, faucet
11. Bell
12. Knock

The metadata CSV files have the following columns:

-   frame number (int): 0, 1, ..., resolution of 100 ms
-   active class index (int): 0, 1, ...
-   source number index (int): 0, 1, ..., unique integer for each source
-   azimuth (int): [-180, 180], front = 0
-   elevation (int): [-90, 90], left = 90
-   distance (int): centimeters

Multiple events of the same class can occur at the same time, each having a
different source index. Occurrences of up to 3 simultaneous events are fairly
common, while higher numbers of overlapping events (up to 5) can occur but are
rare.


In [8]:
columns = ['frame', 'class', 'source', 'azimuth', 'elevation', 'distance']


def check_meta_file(file, dir):
    df = pd.read_csv(f'{METADATA_DEV}/{dir}/{file}', header=None, names=columns)
    max_frame = df['frame'].max()
    classes_per_frame = df.groupby('frame')['class'].nunique().max()
    class_instances_per_frame = df.groupby(['frame', 'class']).size().max()
    instances_per_frame = df.groupby('frame').size().max()

    annotated_frames = df['frame'].nunique()
    multi_event_frames = df.groupby('frame').size().gt(1).sum()
    multi_instance_frames = df.groupby(['frame', 'class']).size().gt(1).sum()

    filename, _ = file.split('.')
    waveform, sample_rate = torchaudio.load(f'{FOA_DEV}/{dir}/{filename}.wav')
    frames = waveform.shape[1] / sample_rate * 10
    print(
        f'  {filename:>19}: {frames:6.1f} ({max_frame:4}) frames, '
        f'{classes_per_frame} cls, {class_instances_per_frame} cls inst, '
        f'{instances_per_frame} insts'
    )

    return (
        frames,
        classes_per_frame,
        class_instances_per_frame,
        instances_per_frame,
        annotated_frames / max_frame,
        multi_event_frames / annotated_frames,
        multi_instance_frames / annotated_frames,
    )

In [9]:
weights = np.zeros(4)
annotated_ratios = np.zeros(4)
multi_event_ratios = np.zeros(4)
multi_instance_ratios = np.zeros(4)

for i, dir in enumerate(listdir(METADATA_DEV)):
    files = listdir(f'{METADATA_DEV}/{dir}')
    print(f'Directory: {dir}')
    print(f'Files: {len(files)}')

    seconds, max_frames = 0, 0
    max_classes, max_class_instances, max_instances = 0, 0, 0
    annotated_ratio = np.zeros(len(files))
    multi_event_ratio = np.zeros(len(files))
    multi_instance_ratio = np.zeros(len(files))
    for j, file in enumerate(files):
        (
            frames,
            classes,
            class_instances,
            instances,
            annotated,
            multi_event,
            multi_instance,
        ) = check_meta_file(file, dir)

        seconds += frames / 10
        max_frames = max(max_frames, frames)
        max_classes = max(max_classes, classes)
        max_class_instances = max(max_class_instances, class_instances)
        max_instances = max(max_instances, instances)

        annotated_ratio[j] = annotated
        multi_event_ratio[j] = multi_event
        multi_instance_ratio[j] = multi_instance
    print(
        f'Max frames: {max_frames:.1f}, '
        f'Max classes: {max_classes}, '
        f'Max class instances: {max_class_instances}, '
        f'Max instances: {max_instances}\n'
        f'Hours: {seconds / 3600:.1f}h, '
        f'Annotated: {annotated.mean():.2%}, '
        f'Multi-class: {multi_event.mean():.2%}, '
        f'Multi-instance: {multi_instance.mean():.2%}\n'
    )

    weights[i] = seconds
    annotated_ratios[i] = annotated_ratio.mean()
    multi_event_ratios[i] = multi_event_ratio.mean()
    multi_instance_ratios[i] = multi_instance_ratio.mean()

weights /= weights.sum()


Directory: dev-test-tau
Files: 48
  fold4_room16_mix004: 1418.8 (1373) frames, 3 cls, 2 cls inst, 4 insts
  fold4_room16_mix003: 1311.5 (1311) frames, 3 cls, 2 cls inst, 4 insts
  fold4_room16_mix014:  960.2 ( 909) frames, 3 cls, 1 cls inst, 3 insts
   fold4_room8_mix008: 1672.5 (1629) frames, 2 cls, 3 cls inst, 3 insts
  fold4_room16_mix002: 1267.4 (1267) frames, 2 cls, 2 cls inst, 3 insts
  fold4_room16_mix007: 2045.4 (2000) frames, 2 cls, 2 cls inst, 2 insts
   fold4_room2_mix003: 2534.1 (2489) frames, 2 cls, 1 cls inst, 2 insts
  fold4_room15_mix010: 5692.3 (5625) frames, 1 cls, 1 cls inst, 1 insts
  fold4_room16_mix005:  477.9 ( 505) frames, 3 cls, 2 cls inst, 3 insts
  fold4_room16_mix009:  840.7 ( 810) frames, 2 cls, 1 cls inst, 2 insts
  fold4_room10_mix004: 1480.7 (1429) frames, 2 cls, 1 cls inst, 2 insts
  fold4_room10_mix001: 1378.7 (1339) frames, 2 cls, 1 cls inst, 2 insts
  fold4_room15_mix009: 2237.4 (2216) frames, 3 cls, 2 cls inst, 5 insts
  fold4_room10_mix006: 1704.9 

Percentages of frames with:

- at least one event,
- at least two events,
- at least two events of the same class.

In [10]:
print(f'Annotated: {np.sum(weights * annotated_ratios):.2%}')
print(f'Multi-event: {np.sum(weights * multi_event_ratios):.2%}')
print(f'Multi-instance: {np.sum(weights * multi_instance_ratios):.2%}')

Annotated: 85.21%
Multi-event: 36.01%
Multi-instance: 9.63%


## Dataset length

Train split: 1.3h + 2.9h = 4.2h

Test split: 0.8h + 2.4h = 3.2h