In [None]:
from google.colab import drive
drive.mount('/content/drive')
!cp '/content/drive/MyDrive/temporary/acou/train.csv' .
!unzip '/content/drive/MyDrive/temporary/acou/audio_train.zip'

In [2]:
from sklearn import svm

In [3]:
import librosa
from librosa.feature import mfcc
from librosa.core import load

In [10]:
import os
from tqdm import tqdm
import pickle
import pandas as pd
import numpy as np
from librosa.core import load as load_wav
from librosa import power_to_db

# constructing data - dicts of fname, mfccs, label 
def get_mfcc_features(file, dir, sr, fft_size, hop, n_mfcc):

    features = []
    dataset_info = pd.read_csv(file, skiprows=1, names=['fname', 'label'])
    file_names = list(dataset_info['fname'])
    n_files = len(file_names)
    labels = list(dataset_info['label'])
    unique_labels = np.sort(np.unique(labels))
    label_encoding = {label: i for i, label in enumerate(unique_labels)}

    print('Number of files:', n_files)

    for i, (file_name, label) in tqdm(enumerate(zip(file_names, labels))):
        wav_data, sr = load_wav(os.path.join(dir, file_name))
        # print(sr)

        mfccs = mfcc(wav_data, n_mfcc = n_mfcc, n_fft=fft_size, hop_length=hop, fmax=sr // 2)
        features.append({
            'fname': file_name,
            'mfcc': mfccs,
            'label': label_encoding[label]
        })
    pickle.dump(features, open('features.pickle', 'wb'))    
    return label_encoding

In [8]:
train_folder = './train/'
sr = 22050
fft_size = 2048
hop_length = 512
n_mfcc = 80

In [26]:
csv_file = 'train.csv'
pickle_features = 'train.pickle'

labels = get_mfcc_features(csv_file, train_folder, sr, fft_size, hop_length, n_mfcc)

Number of files: 5683
5683it [19:45,  4.79it/s]


In [12]:
# padding (or cutting) mfccs to the same length
def mfcc_to_feature(mfcc, length_th=256):
  if mfcc.shape[1] < length_th:
    npad = [(0, 0), (0, length_th-mfcc.shape[1])]
    feature = np.pad(mfcc, pad_width=npad, mode='reflect')
  else:
    feature=mfcc[:,:length_th]
  return feature

In [13]:
train_mfccs = pickle.load(open('features.pickle', 'rb'))

In [15]:
features = []
labels = []
for sample in train_mfccs:
  feature_ = mfcc_to_feature(sample['mfcc'])
  features.append(feature_.flatten())
  labels.append(sample['label'])

In [None]:
# pickle.dump(features, open('/content/drive/MyDrive/temporary/acou/feats', 'wb'))
# pickle.dump(labels, open('/content/drive/MyDrive/temporary/acou/labels', 'wb')) 

In [None]:
# features = pickle.load(open('/content/drive/MyDrive/temporary/acou/feats', 'rb'))
# labels = pickle.load(open('/content/drive/MyDrive/temporary/acou/labels', 'rb'))

In [16]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, labels, 
                                                    test_size=0.2, 
                                                    random_state=0, shuffle=True)

In [17]:
from sklearn.metrics import accuracy_score

In [22]:
# use svc to classificate prepared data
clf = svm.SVC(C=0.9, tol=1e-6, random_state=42, kernel='linear', )
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(f'accuracy score: {acc}')

accuracy score: 0.5617414248021109
