In [1]:
import csv
import os
import warnings
from typing import List, AnyStr

import librosa
import numpy as np
import scipy.stats
from tqdm import tqdm

%matplotlib inline
warnings.filterwarnings('ignore')

In [5]:
# parse normal and abnormal audio file path
def parse_audio_path(_training_dir: str, _syb_dir: str) -> List[AnyStr]:
    with open(f'{_training_dir}/{_syb_dir}/RECORDS-normal', 'r') as fn:
        normal = fn.read().strip().split('\n')
        normal = list(map(lambda elem: f'{elem.strip()}.wav/1', normal))

    with open(f'{training_dir}/{syb_dir}/RECORDS-abnormal', 'r') as fab:
        abnormal = fab.read().strip().split('\n')
        abnormal = list(map(lambda elem: f'{elem.strip()}.wav/0', abnormal))

    return normal + abnormal



In [6]:
def extract_features(_training_dir: str, _syb_dir: str, _file_path: str, _label: int) -> List[float]:
    _path = f'{_training_dir}/{_syb_dir}/{_file_path}'
    y, sr = librosa.load(_path, mono=True, duration=5)

    audio_description = [f'/{syb_dir}/{file_path}']
    audio_description.extend([np.mean(e) for e in librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)])  # mfcc_mean<0..20>
    audio_description.extend([np.std(e) for e in librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)])  # mfcc_std<0..20>

    audio_description.append(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr).T, axis=0)[0])  # cent_mean
    audio_description.append(np.std(librosa.feature.spectral_centroid(y=y, sr=sr).T, axis=0)[0])  # cent_std
    audio_description.append(scipy.stats.skew(librosa.feature.spectral_centroid(y=y, sr=sr).T, axis=0)[0])  # cent_skew
    audio_description.append(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr).T, axis=0)[0])  # rolloff_mean
    audio_description.append(np.std(librosa.feature.spectral_rolloff(y=y, sr=sr).T, axis=0)[0])  # rolloff_std
    audio_description.append(_label)
    return audio_description

In [7]:
buffer = []
count_audio = 0
count_rows = 0

path = '../data_training/'
training_dir, training_syb_dir, _ = next(os.walk(path))
print(training_dir, training_syb_dir)

# Создание заголовка для файла CSV.
header = ['filename']
header.extend([f'mfcc_mean{i}' for i in range(1, 21)])
header.extend([f'mfcc_std{i}' for i in range(1, 21)])
header.extend(['cent_mean', 'cent_std', 'cent_skew', 'rolloff_mean', 'rolloff_std', 'label'])

with open('../data_feature/dataset_metrics.csv', 'w', newline='') as file:
    writer = csv.writer(file, delimiter=',')
    writer.writerow(header)
    for syb_dir in training_syb_dir:

        audio_paths = parse_audio_path(training_dir, syb_dir)
        print(syb_dir, end="  ")
        buffer_counter = 0

        for file_path in tqdm(audio_paths):
            file, label = file_path.split('/')
            features = extract_features(training_dir, syb_dir, file, label)
            buffer.append(features)
            if buffer:
                writer.writerows(buffer)
                buffer_counter += 1
            buffer = []

        print(f'Volume valid audio: {len(audio_paths)}, recorded rows in file: {buffer_counter}')
        count_audio += len(audio_paths)
        count_rows += buffer_counter

print(f'Analysis completed successfully, analyzed: {count_audio} files, written rows: {count_rows}')

../data_training/ ['training-b', 'training-e', 'training-d', 'training-c', 'training-f', 'training-a']
training-b  

100%|██████████| 490/490 [00:20<00:00, 23.51it/s]


Volume valid audio: 490, recorded rows in file: 490
training-e  

100%|██████████| 2141/2141 [01:25<00:00, 25.11it/s]


Volume valid audio: 2141, recorded rows in file: 2141
training-d  

100%|██████████| 55/55 [00:02<00:00, 24.96it/s]


Volume valid audio: 55, recorded rows in file: 55
training-c  

100%|██████████| 31/31 [00:01<00:00, 24.55it/s]


Volume valid audio: 31, recorded rows in file: 31
training-f  

100%|██████████| 114/114 [00:04<00:00, 24.74it/s]


Volume valid audio: 114, recorded rows in file: 114
training-a  

100%|██████████| 409/409 [00:16<00:00, 24.51it/s]

Volume valid audio: 409, recorded rows in file: 409
Analysis completed successfully, analyzed: 3240 files, written rows: 3240



