In [2]:
! pip install librosa

Collecting librosa
  Downloading librosa-0.5.1.tar.gz (1.5MB)
[K    100% |################################| 1.5MB 658kB/s eta 0:00:01
[?25hCollecting audioread>=2.0.0 (from librosa)
  Downloading audioread-2.1.5.tar.gz
Collecting resampy>=0.1.2 (from librosa)
  Downloading resampy-0.2.0.tar.gz (322kB)
[K    100% |################################| 327kB 1.9MB/s eta 0:00:01
Building wheels for collected packages: librosa, audioread, resampy
  Running setup.py bdist_wheel for librosa ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/7a/bf/9f/fc279e1814da2144161848a7419cdcf923d0c03f481abe7904
  Running setup.py bdist_wheel for audioread ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/47/cc/d8/e1476000d1720a72117ef5c0b1a51388b8567ebfface47ede7
  Running setup.py bdist_wheel for resampy ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/c6/8e/c0/4c402972789ddf6d2d701468417de19d0cad28bd698a867e2d
Successfully built

In [3]:
import glob
import os
import librosa
import numpy as np
import sys

In [4]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

In [5]:
def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
    features, labels = np.empty((0,193)), np.empty(0)
    files = []
    print('==DIR', parent_dir)
    for label, sub_dir in enumerate(sub_dirs):
        print('====DIR', label, sub_dir)
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            basename = os.path.basename(fn)
            try:
                mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
                ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
                features = np.vstack([features,ext_features])
                label = basename.split('-')[1]
                labels = np.append(labels, label)
                files.append(basename)
                print(fn, label, ext_features.shape)
            except:
                print(fn, 'SKIP')
            
            
    return files, np.array(features), np.array(labels, dtype = np.int)

In [6]:
def one_hot_encode(labels):
    num_data = len(labels)
    num_of_classes = 10 # len(np.unique(labels))
    one_hot = np.zeros((num_data,num_of_classes))
    one_hot[np.arange(num_data), labels] = 1
    return one_hot

In [7]:
files, features, labels = parse_audio_files('audio_for_train', ['data'])

fp = open('train_features.csv', 'w')
for file, label, feature in zip(files, labels, features):
    fp.write('%s,%d,%s\n'  % (file, label, ','.join(['%f' % f for f in feature])))
fp.close()

print('Audio Data Loading Done')
print("feature. shape",features.shape)
print("label. shape",labels.shape)

==DIR audio_for_train
====DIR 0 data
audio_for_train/data/7383-3-1-0.wav 3 (193,)
audio_for_train/data/24074-1-0-3.wav 1 (193,)
audio_for_train/data/72579-3-0-2.wav 3 (193,)
audio_for_train/data/54858-3-2-0.wav 3 (193,)
audio_for_train/data/138031-2-0-2.wav 2 (193,)
audio_for_train/data/94868-1-0-0.wav 1 (193,)
audio_for_train/data/105415-2-0-1.wav 2 (193,)
audio_for_train/data/51022-3-7-0.wav 3 (193,)


  if np.any(X < 0) or np.any(X_ref < 0):
  bad_idx = (Z < np.finfo(dtype).tiny)


audio_for_train/data/87275-1-3-0.wav SKIP
audio_for_train/data/102106-3-0-0.wav 3 (193,)
audio_for_train/data/105415-2-0-4.wav 2 (193,)
audio_for_train/data/87275-1-4-0.wav 1 (193,)
audio_for_train/data/24074-1-0-10.wav 1 (193,)




audio_for_train/data/9031-3-3-0.wav 3 (193,)
audio_for_train/data/15564-2-0-2.wav 2 (193,)
audio_for_train/data/105415-2-0-8.wav 2 (193,)
audio_for_train/data/87275-1-1-0.wav SKIP
audio_for_train/data/7383-3-0-1.wav 3 (193,)
audio_for_train/data/65749-3-1-9.wav 3 (193,)
audio_for_train/data/15564-2-0-1.wav 2 (193,)
audio_for_train/data/72579-3-0-3.wav 3 (193,)
audio_for_train/data/108362-2-0-7.wav 2 (193,)
audio_for_train/data/24074-1-0-6.wav 1 (193,)
audio_for_train/data/43786-3-1-0.wav 3 (193,)
audio_for_train/data/24074-1-0-0.wav 1 (193,)
audio_for_train/data/43786-3-0-0.wav 3 (193,)
audio_for_train/data/31840-3-0-0.wav 3 (193,)
audio_for_train/data/24074-1-0-9.wav 1 (193,)
audio_for_train/data/15564-2-0-0.wav 2 (193,)
audio_for_train/data/101415-3-0-3.wav 3 (193,)
audio_for_train/data/24074-1-0-8.wav 1 (193,)
audio_for_train/data/54858-3-0-0.wav 3 (193,)




audio_for_train/data/9031-3-4-0.wav 3 (193,)
audio_for_train/data/54858-3-1-2.wav 3 (193,)
audio_for_train/data/31840-3-1-0.wav 3 (193,)
audio_for_train/data/72579-3-0-4.wav 3 (193,)
audio_for_train/data/24074-1-0-2.wav 1 (193,)


  if np.any(X < 0) or np.any(X_ref < 0):
  bad_idx = (Z < np.finfo(dtype).tiny)


audio_for_train/data/87275-1-2-0.wav SKIP
audio_for_train/data/138031-2-0-7.wav 2 (193,)
audio_for_train/data/105415-2-0-6.wav 2 (193,)
audio_for_train/data/108362-2-0-9.wav 2 (193,)
audio_for_train/data/94868-1-2-0.wav 1 (193,)
audio_for_train/data/72261-3-0-6.wav 3 (193,)
audio_for_train/data/101415-3-0-2.wav 3 (193,)
audio_for_train/data/24074-1-0-4.wav 1 (193,)
audio_for_train/data/72579-3-0-0.wav 3 (193,)




audio_for_train/data/9031-3-2-0.wav 3 (193,)
audio_for_train/data/19026-1-0-0.wav 1 (193,)
audio_for_train/data/87275-1-0-0.wav 1 (193,)
audio_for_train/data/24074-1-0-1.wav 1 (193,)
audio_for_train/data/87275-1-5-0.wav 1 (193,)
audio_for_train/data/31323-3-0-2.wav 3 (193,)
audio_for_train/data/9031-3-1-0.wav 3 (193,)
audio_for_train/data/31323-3-0-1.wav 3 (193,)
audio_for_train/data/94868-1-1-0.wav 1 (193,)
audio_for_train/data/97317-2-0-5.wav 2 (193,)
audio_for_train/data/24074-1-0-7.wav 1 (193,)
audio_for_train/data/7383-3-0-0.wav 3 (193,)
audio_for_train/data/24074-1-0-5.wav 1 (193,)
audio_for_train/data/101415-3-0-8.wav 3 (193,)
Audio Data Loading Done
feature. shape (57, 193)
label. shape (57,)


In [8]:
files, features, labels = parse_audio_files('audio_for_test', ['.'])

fp = open('test_features.csv', 'w')
for file, label, feature in zip(files, labels, features):
    fp.write('%s,%d,%s\n'  % (file, label, ','.join(['%f' % f for f in feature])))
fp.close()

==DIR audio_for_test
====DIR 0 .
audio_for_test/./60591-2-0-4.wav 2 (193,)
audio_for_test/./60591-2-0-7.wav 2 (193,)
audio_for_test/./74723-3-0-1.wav 3 (193,)
audio_for_test/./145577-1-0-0.wav 1 (193,)
audio_for_test/./7389-1-2-2.wav 1 (193,)
audio_for_test/./204773-3-8-0.wav 3 (193,)
