# Calculating mean and standard deviation of the dataset based on provided path

In [2]:
#line to render the plots under the code cell that created it
%matplotlib inline
import sys  # Python system library needed to load custom functions
import numpy as np  # for performing calculations on numerical arrays
import os     # for changing the directory

from datasets import load_dataset, Audio  # required tools to create, load and process our audio dataset
from transformers import ASTFeatureExtractor, ASTForAudioClassification, TrainingArguments, Trainer  # required classes to perform the model training
from preprocessing import calculate_stats, preprocess_audio_arrays  # functions to calculate dataset statistics and preprocess the dataset with ASTFeatureExtractor
sys.path.append('../src')  # add the source directory to the PYTHONPATH. This allows to import local functions and modules.
os.chdir('../..') # changing our directory to root

In [3]:
# path for the train dataset
train_path = 'data/data_small/train'

In [4]:
def calculate_mean_and_std(train_path, model_sampling_rate=22050):
    train_dataset = load_dataset("audiofolder", data_dir=train_path).get('train').shuffle(seed=42)
    train_dataset = train_dataset.cast_column("audio", Audio(sampling_rate=model_sampling_rate))
    feature_extractor_stats = ASTFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", do_normalize=False)
    train_dataset = train_dataset.map(lambda x: calculate_stats(x, audio_field='audio', array_field='array', feature_extractor=feature_extractor_stats), batched=True)
    dataset_mean = np.mean(train_dataset['mean'])
    dataset_std = np.mean(train_dataset['std'])
    return dataset_mean, dataset_std

In [5]:
dataset_mean, dataset_std = calculate_mean_and_std(train_path, model_sampling_rate=22050)

Resolving data files:   0%|          | 0/177 [00:00<?, ?it/s]

Found cached dataset audiofolder (/root/.cache/huggingface/datasets/audiofolder/default-eeb27949c43cf948/0.0.0/6cbdd16f8688354c63b4e2a36e1585d05de285023ee6443ffd71c4182055c0fc)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/audiofolder/default-eeb27949c43cf948/0.0.0/6cbdd16f8688354c63b4e2a36e1585d05de285023ee6443ffd71c4182055c0fc/cache-b47fdeb437c8444e.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/audiofolder/default-eeb27949c43cf948/0.0.0/6cbdd16f8688354c63b4e2a36e1585d05de285023ee6443ffd71c4182055c0fc/cache-cc80b3a08c0c6a3d.arrow


In [6]:
print(dataset_mean, dataset_std)

-8.432584164494818 4.1901741698384285
