In [None]:
from pathlib import Path

import pandas as pd
from IPython.display import display, Audio

In [None]:
DATASET_DIR: Path = Path('../chad_dataset/fragments')
CSV_PATH: str = '../metadata/dataset.csv'
AUDIO_EXT: str = '.wav'

In [None]:
def is_downloaded(r: pd.Series) -> bool:
    """
    Check if audio file corresponding to the DataFrame row is downloaded.

    :param r: DataFrame row containing 'group_id', 'fragment_id', and 'id'.
    :return: True if the audio file exists, False otherwise.
    """
    path = (DATASET_DIR / r['group_id'] / str(r['fragment_id']) / str(r['id'])).with_suffix(AUDIO_EXT)
    return path.is_file()

def show_audio(r: pd.Series) -> None:
    """
    Display the audio file corresponding to the DataFrame row.

    :param r: DataFrame row containing 'group_id', 'fragment_id', and 'id'.
    """
    path = (DATASET_DIR / r['group_id'] / str(r['fragment_id']) / str(r['id'])).with_suffix(AUDIO_EXT)
    display(Audio(path))
    return None

def show_random_group(
        n_examples: int = 3
):
    """
    Show random examples of downloaded audio files from the same group and fragment.

    :param n_examples: Number of random examples to show.
    :return: DataFrame containing the selected examples.
    """
    df = pd.read_csv(CSV_PATH)
    df['is_downloaded'] = df.apply(is_downloaded, axis=1)
    df = df[df['is_downloaded']]

    # Select a random group and fragment
    random_group, random_fragment = df.sample()[['group_id', 'fragment_id']].values[0]

    # Select examples from the same group and fragment
    examples = df[
        df['group_id'].eq(random_group) & df['fragment_id'].eq(random_fragment)
    ]

    # Sample n_examples random examples from the selected group and fragment
    examples = examples.sample(min(examples.shape[0], n_examples))

    print(f'GroupID: {random_group}. FragmentID: {random_fragment}')
    for i, example in examples.iterrows():
        show_audio(example)

    return examples

In [None]:
show_random_group(
    n_examples=5
)