In [33]:
!pip install python_speech_features



In [34]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [35]:
import zipfile
pickle ='./drive/MyDrive/bestpickles.zip'
zip_ref=zipfile.ZipFile(pickle,'r')
zip_ref.extractall('./')
zip_ref.close()

In [36]:
models = './drive/MyDrive/bestmodels.zip'
zip_ref=zipfile.ZipFile(models,'r')
zip_ref.extractall('./')
zip_ref.close()

In [37]:
test = './drive/MyDrive/test.zip'
zip_ref=zipfile.ZipFile(test,'r')
zip_ref.extractall('./')
zip_ref.close()

In [38]:
import pickle
import os
import numpy as np
from tqdm import tqdm
from scipy.io import wavfile
from python_speech_features import mfcc
from keras.models import load_model
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [39]:
def build_predictions(audio_dir):
    y_true = []
    y_pred = []
    fn_prob = {}

    print('Extracting features from audio')
    for fn in tqdm(os.listdir(audio_dir)):
        rate, wav = wavfile.read(os.path.join(audio_dir, fn))
        label = fn2class[fn]
        c = classes.index(label)
        y_prob = []

        for i in range(0, wav.shape[0]-config.step, config.step):
            sample = wav[i:i+config.step]
            x = mfcc(sample, rate, numcep=config.nfeat,
                     nfilt=config.nfilt, nfft=config.nfft).T if config.mode =='conv' else mfcc(sample, rate, numcep=config.nfeat,
                     nfilt=config.nfilt, nfft=config.nfft)
            x = (x-config.min)/(config.max-config.min)

            if config.mode == 'conv':
                x = x.reshape(1, x.shape[0], x.shape[1], 1)
            elif config.mode == 'time':
                x = np.expand_dims(x, axis=0)
            y_hat = model.predict(x)
            y_prob.append(y_hat)
            y_pred.append(np.argmax(y_hat))
            y_true.append(c)

        fn_prob[fn] = np.mean(y_prob, axis=0).flatten()

    return y_true, y_pred, fn_prob


In [40]:
df = pd.read_csv('./drive/MyDrive/test_mapping.csv')
classes = list(np.unique(df.label))
fn2class = dict(zip(df.fname, df.label))
p_path = os.path.join('pickles', 'time.p')


In [41]:
class Config:
    def __init__(self, mode='time', nfilt=26, nfeat=13, nfft=512, rate=16000):
        self.mode=mode
        self.nfilt=nfilt
        self.nfeat=nfeat
        self.nfft = nfft
        self.rate=rate
        self.step=int(rate/10)
        self.model_path = os.path.join('models', mode + '.model')
        self.p_path = os.path.join('pickles', mode + '.p')

with open(p_path, 'rb') as handle:
    config = pickle.load(handle)

model = load_model(config.model_path)
y_true, y_pred, fn_prob = build_predictions('test')
acc_score = accuracy_score(y_true=y_true, y_pred=y_pred)


Extracting features from audio


  0%|          | 0/217 [00:00<?, ?it/s]



  0%|          | 1/217 [00:02<10:12,  2.83s/it]



  1%|          | 2/217 [00:04<07:13,  2.02s/it]



  1%|▏         | 3/217 [00:05<05:59,  1.68s/it]



  2%|▏         | 4/217 [00:06<05:04,  1.43s/it]



  2%|▏         | 5/217 [00:08<05:10,  1.47s/it]



  3%|▎         | 6/217 [00:09<05:29,  1.56s/it]



  3%|▎         | 7/217 [00:11<05:57,  1.70s/it]



  4%|▎         | 8/217 [00:13<05:43,  1.64s/it]



  4%|▍         | 9/217 [00:14<05:24,  1.56s/it]



  5%|▍         | 10/217 [00:15<04:54,  1.42s/it]



  5%|▌         | 11/217 [00:17<04:40,  1.36s/it]



  6%|▌         | 12/217 [00:17<04:08,  1.21s/it]



  6%|▌         | 13/217 [00:19<04:20,  1.28s/it]



  6%|▋         | 14/217 [00:20<03:58,  1.18s/it]



  7%|▋         | 15/217 [00:21<03:59,  1.19s/it]



  7%|▋         | 16/217 [00:24<06:12,  1.86s/it]



  8%|▊         | 17/217 [00:26<05:47,  1.74s/it]



  8%|▊         | 18/217 [00:27<04:59,  1.51s/it]



  9%|▉         | 19/217 [00:28<05:01,  1.52s/it]



  9%|▉         | 20/217 [00:30<04:44,  1.45s/it]



 10%|▉         | 21/217 [00:31<04:29,  1.38s/it]



 10%|█         | 22/217 [00:32<04:08,  1.27s/it]



 11%|█         | 23/217 [00:33<04:03,  1.25s/it]



 11%|█         | 24/217 [00:34<03:57,  1.23s/it]



 12%|█▏        | 25/217 [00:36<04:33,  1.43s/it]



 12%|█▏        | 26/217 [00:37<04:20,  1.36s/it]



 12%|█▏        | 27/217 [00:39<04:24,  1.39s/it]



 13%|█▎        | 28/217 [00:40<04:26,  1.41s/it]



 13%|█▎        | 29/217 [00:42<04:11,  1.34s/it]



 14%|█▍        | 30/217 [00:43<03:52,  1.24s/it]



 14%|█▍        | 31/217 [00:44<04:06,  1.33s/it]



 15%|█▍        | 32/217 [00:45<03:57,  1.28s/it]



 15%|█▌        | 33/217 [00:46<03:43,  1.21s/it]



 16%|█▌        | 34/217 [00:48<04:13,  1.38s/it]



 16%|█▌        | 35/217 [00:50<04:15,  1.40s/it]



 17%|█▋        | 36/217 [00:51<04:08,  1.37s/it]



 17%|█▋        | 37/217 [00:52<03:47,  1.26s/it]



 18%|█▊        | 38/217 [00:53<03:56,  1.32s/it]



 18%|█▊        | 39/217 [00:55<03:58,  1.34s/it]



 18%|█▊        | 40/217 [00:56<03:52,  1.32s/it]



 19%|█▉        | 41/217 [00:57<03:53,  1.32s/it]



 19%|█▉        | 42/217 [00:59<03:58,  1.36s/it]



 20%|█▉        | 43/217 [01:00<03:30,  1.21s/it]



 20%|██        | 44/217 [01:02<04:22,  1.52s/it]



 21%|██        | 45/217 [01:04<04:35,  1.60s/it]



 21%|██        | 46/217 [01:04<03:50,  1.35s/it]



 22%|██▏       | 47/217 [01:06<04:15,  1.50s/it]



 22%|██▏       | 48/217 [01:07<03:54,  1.39s/it]



 23%|██▎       | 49/217 [01:09<03:50,  1.37s/it]



 23%|██▎       | 50/217 [01:10<03:52,  1.39s/it]



 24%|██▎       | 51/217 [01:11<03:28,  1.26s/it]



 24%|██▍       | 52/217 [01:12<03:23,  1.24s/it]



 24%|██▍       | 53/217 [01:14<03:39,  1.34s/it]



 25%|██▍       | 54/217 [01:16<03:57,  1.45s/it]



 25%|██▌       | 55/217 [01:17<03:58,  1.47s/it]



 26%|██▌       | 56/217 [01:18<03:43,  1.39s/it]



 26%|██▋       | 57/217 [01:20<03:40,  1.38s/it]



 27%|██▋       | 58/217 [01:21<03:21,  1.27s/it]



 27%|██▋       | 59/217 [01:21<02:58,  1.13s/it]



 28%|██▊       | 60/217 [01:23<03:00,  1.15s/it]



 28%|██▊       | 61/217 [01:24<02:50,  1.09s/it]



 29%|██▊       | 62/217 [01:25<02:56,  1.14s/it]



 29%|██▉       | 63/217 [01:27<03:32,  1.38s/it]



 29%|██▉       | 64/217 [01:28<03:39,  1.44s/it]



 30%|██▉       | 65/217 [01:30<03:30,  1.39s/it]



 30%|███       | 66/217 [01:31<03:10,  1.26s/it]



 31%|███       | 67/217 [01:32<03:15,  1.30s/it]



 31%|███▏      | 68/217 [01:33<02:56,  1.19s/it]



 32%|███▏      | 69/217 [01:34<02:58,  1.20s/it]



 32%|███▏      | 70/217 [01:36<03:10,  1.29s/it]



 33%|███▎      | 71/217 [01:37<03:05,  1.27s/it]



 33%|███▎      | 72/217 [01:38<02:58,  1.23s/it]



 34%|███▎      | 73/217 [01:40<03:19,  1.38s/it]



 34%|███▍      | 74/217 [01:42<03:42,  1.55s/it]



 35%|███▍      | 75/217 [01:43<03:36,  1.52s/it]



 35%|███▌      | 76/217 [01:45<03:33,  1.51s/it]



 35%|███▌      | 77/217 [01:46<03:23,  1.46s/it]



 36%|███▌      | 78/217 [01:47<03:04,  1.32s/it]



 36%|███▋      | 79/217 [01:48<02:47,  1.21s/it]



 37%|███▋      | 80/217 [01:49<02:36,  1.14s/it]



 37%|███▋      | 81/217 [01:50<02:40,  1.18s/it]



 38%|███▊      | 82/217 [01:52<03:07,  1.39s/it]



 38%|███▊      | 83/217 [01:54<03:20,  1.50s/it]



 39%|███▊      | 84/217 [01:55<03:19,  1.50s/it]



 39%|███▉      | 85/217 [01:56<03:03,  1.39s/it]



 40%|███▉      | 86/217 [01:58<03:11,  1.46s/it]



 40%|████      | 87/217 [01:59<02:58,  1.38s/it]



 41%|████      | 88/217 [02:01<03:19,  1.55s/it]



 41%|████      | 89/217 [02:03<03:32,  1.66s/it]



 41%|████▏     | 90/217 [02:06<03:57,  1.87s/it]



 42%|████▏     | 91/217 [02:09<04:59,  2.37s/it]



 42%|████▏     | 92/217 [02:12<05:13,  2.50s/it]



 43%|████▎     | 93/217 [02:14<05:07,  2.48s/it]



 43%|████▎     | 94/217 [02:16<04:37,  2.25s/it]



 44%|████▍     | 95/217 [02:17<03:43,  1.83s/it]



 44%|████▍     | 96/217 [02:18<03:33,  1.77s/it]



 45%|████▍     | 97/217 [02:20<03:33,  1.78s/it]



 45%|████▌     | 98/217 [02:22<03:16,  1.65s/it]



 46%|████▌     | 99/217 [02:23<03:13,  1.64s/it]



 46%|████▌     | 100/217 [02:25<03:00,  1.55s/it]



 47%|████▋     | 101/217 [02:26<02:59,  1.55s/it]



 47%|████▋     | 102/217 [02:28<02:54,  1.51s/it]



 47%|████▋     | 103/217 [02:29<02:40,  1.41s/it]



 48%|████▊     | 104/217 [02:30<02:21,  1.25s/it]



 48%|████▊     | 105/217 [02:31<02:35,  1.39s/it]



 49%|████▉     | 106/217 [02:33<02:43,  1.47s/it]



 49%|████▉     | 107/217 [02:34<02:34,  1.40s/it]



 50%|████▉     | 108/217 [02:36<02:37,  1.44s/it]



 50%|█████     | 109/217 [02:37<02:28,  1.38s/it]



 51%|█████     | 110/217 [02:38<02:18,  1.30s/it]



 51%|█████     | 111/217 [02:39<02:14,  1.27s/it]



 52%|█████▏    | 112/217 [02:41<02:11,  1.25s/it]



 52%|█████▏    | 113/217 [02:42<02:21,  1.36s/it]



 53%|█████▎    | 114/217 [02:43<02:10,  1.26s/it]



 53%|█████▎    | 115/217 [02:45<02:16,  1.34s/it]



 53%|█████▎    | 116/217 [02:46<02:21,  1.40s/it]



 54%|█████▍    | 117/217 [02:48<02:22,  1.43s/it]



 54%|█████▍    | 118/217 [02:49<02:08,  1.30s/it]



 55%|█████▍    | 119/217 [02:50<01:53,  1.16s/it]



 55%|█████▌    | 120/217 [02:50<01:46,  1.10s/it]



 56%|█████▌    | 121/217 [02:52<01:43,  1.08s/it]



 56%|█████▌    | 122/217 [02:53<01:39,  1.05s/it]



 57%|█████▋    | 123/217 [02:54<01:51,  1.19s/it]



 57%|█████▋    | 124/217 [02:56<02:02,  1.31s/it]



 58%|█████▊    | 125/217 [02:57<01:59,  1.30s/it]



 58%|█████▊    | 126/217 [02:58<01:53,  1.25s/it]



 59%|█████▊    | 127/217 [03:00<02:09,  1.44s/it]



 59%|█████▉    | 128/217 [03:01<01:59,  1.34s/it]



 59%|█████▉    | 129/217 [03:02<01:54,  1.30s/it]



 60%|█████▉    | 130/217 [03:03<01:50,  1.27s/it]



 60%|██████    | 131/217 [03:05<01:51,  1.29s/it]



 61%|██████    | 132/217 [03:06<01:47,  1.26s/it]



 61%|██████▏   | 133/217 [03:07<01:51,  1.33s/it]



 62%|██████▏   | 134/217 [03:08<01:43,  1.24s/it]



 62%|██████▏   | 135/217 [03:10<01:51,  1.36s/it]



 63%|██████▎   | 136/217 [03:12<02:09,  1.60s/it]



 63%|██████▎   | 137/217 [03:13<01:57,  1.46s/it]



 64%|██████▎   | 138/217 [03:15<01:54,  1.45s/it]



 64%|██████▍   | 139/217 [03:16<01:43,  1.32s/it]



 65%|██████▍   | 140/217 [03:17<01:45,  1.37s/it]



 65%|██████▍   | 141/217 [03:19<01:56,  1.54s/it]



 65%|██████▌   | 142/217 [03:21<02:04,  1.66s/it]



 66%|██████▌   | 143/217 [03:23<01:59,  1.61s/it]



 66%|██████▋   | 144/217 [03:24<01:54,  1.57s/it]



 67%|██████▋   | 145/217 [03:26<02:04,  1.72s/it]



 67%|██████▋   | 146/217 [03:28<01:59,  1.69s/it]



 68%|██████▊   | 147/217 [03:29<01:49,  1.56s/it]



 68%|██████▊   | 148/217 [03:31<01:52,  1.63s/it]



 69%|██████▊   | 149/217 [03:32<01:43,  1.52s/it]



 69%|██████▉   | 150/217 [03:34<01:52,  1.67s/it]



 70%|██████▉   | 151/217 [03:36<01:46,  1.61s/it]



 70%|███████   | 152/217 [03:38<01:51,  1.71s/it]



 71%|███████   | 153/217 [03:39<01:49,  1.71s/it]



 71%|███████   | 154/217 [03:41<01:39,  1.58s/it]



 71%|███████▏  | 155/217 [03:42<01:35,  1.54s/it]



 72%|███████▏  | 156/217 [03:44<01:40,  1.64s/it]



 72%|███████▏  | 157/217 [03:45<01:33,  1.56s/it]



 73%|███████▎  | 158/217 [03:46<01:24,  1.43s/it]



 73%|███████▎  | 159/217 [03:48<01:20,  1.39s/it]



 74%|███████▎  | 160/217 [03:49<01:13,  1.30s/it]



 74%|███████▍  | 161/217 [03:50<01:16,  1.37s/it]



 75%|███████▍  | 162/217 [03:52<01:24,  1.54s/it]



 75%|███████▌  | 163/217 [03:54<01:19,  1.47s/it]



 76%|███████▌  | 164/217 [03:55<01:16,  1.45s/it]



 76%|███████▌  | 165/217 [03:56<01:13,  1.41s/it]



 76%|███████▋  | 166/217 [03:58<01:10,  1.39s/it]



 77%|███████▋  | 167/217 [03:59<01:06,  1.33s/it]



 77%|███████▋  | 168/217 [04:00<01:02,  1.27s/it]



 78%|███████▊  | 169/217 [04:01<01:00,  1.27s/it]



 78%|███████▊  | 170/217 [04:03<01:04,  1.38s/it]



 79%|███████▉  | 171/217 [04:04<01:05,  1.43s/it]



 79%|███████▉  | 172/217 [04:06<01:01,  1.38s/it]



 80%|███████▉  | 173/217 [04:07<00:58,  1.32s/it]



 80%|████████  | 174/217 [04:08<00:51,  1.20s/it]



 81%|████████  | 175/217 [04:09<00:50,  1.21s/it]



 81%|████████  | 176/217 [04:10<00:43,  1.06s/it]



 82%|████████▏ | 177/217 [04:11<00:49,  1.23s/it]



 82%|████████▏ | 178/217 [04:12<00:45,  1.18s/it]



 82%|████████▏ | 179/217 [04:14<00:49,  1.31s/it]



 83%|████████▎ | 180/217 [04:16<00:58,  1.57s/it]



 83%|████████▎ | 181/217 [04:18<01:02,  1.73s/it]



 84%|████████▍ | 182/217 [04:20<00:58,  1.66s/it]



 84%|████████▍ | 183/217 [04:21<00:51,  1.53s/it]



 85%|████████▍ | 184/217 [04:22<00:47,  1.44s/it]



 85%|████████▌ | 185/217 [04:24<00:44,  1.38s/it]



 86%|████████▌ | 186/217 [04:25<00:44,  1.45s/it]



 86%|████████▌ | 187/217 [04:26<00:41,  1.37s/it]



 87%|████████▋ | 188/217 [04:28<00:43,  1.49s/it]



 87%|████████▋ | 189/217 [04:30<00:45,  1.63s/it]



 88%|████████▊ | 190/217 [04:31<00:41,  1.54s/it]



 88%|████████▊ | 191/217 [04:33<00:37,  1.46s/it]



 88%|████████▊ | 192/217 [04:34<00:38,  1.55s/it]



 89%|████████▉ | 193/217 [04:35<00:33,  1.41s/it]



 89%|████████▉ | 194/217 [04:37<00:30,  1.33s/it]



 90%|████████▉ | 195/217 [04:38<00:29,  1.35s/it]



 90%|█████████ | 196/217 [04:39<00:26,  1.27s/it]



 91%|█████████ | 197/217 [04:41<00:27,  1.37s/it]



 91%|█████████ | 198/217 [04:42<00:23,  1.26s/it]



 92%|█████████▏| 199/217 [04:45<00:31,  1.77s/it]



 92%|█████████▏| 200/217 [04:47<00:31,  1.83s/it]



 93%|█████████▎| 201/217 [04:48<00:27,  1.72s/it]



 93%|█████████▎| 202/217 [04:50<00:25,  1.71s/it]



 94%|█████████▎| 203/217 [04:51<00:22,  1.63s/it]



 94%|█████████▍| 204/217 [04:53<00:20,  1.59s/it]



 94%|█████████▍| 205/217 [04:54<00:18,  1.56s/it]



 95%|█████████▍| 206/217 [04:56<00:19,  1.76s/it]



 95%|█████████▌| 207/217 [04:58<00:17,  1.74s/it]



 96%|█████████▌| 208/217 [05:00<00:15,  1.73s/it]



 96%|█████████▋| 209/217 [05:01<00:12,  1.62s/it]



 97%|█████████▋| 210/217 [05:03<00:12,  1.75s/it]



 97%|█████████▋| 211/217 [05:05<00:09,  1.61s/it]



 98%|█████████▊| 212/217 [05:06<00:07,  1.43s/it]



 98%|█████████▊| 213/217 [05:07<00:05,  1.42s/it]



 99%|█████████▊| 214/217 [05:09<00:04,  1.63s/it]



 99%|█████████▉| 215/217 [05:11<00:03,  1.74s/it]



100%|█████████▉| 216/217 [05:12<00:01,  1.55s/it]



100%|██████████| 217/217 [05:14<00:00,  1.45s/it]


In [42]:
precision = precision_score(y_true=y_true, y_pred=y_pred, average='weighted')
recall = recall_score(y_true=y_true, y_pred=y_pred, average='weighted')
f1 = f1_score(y_true=y_true, y_pred=y_pred, average='weighted')

In [43]:
y_probs = []
for i, row in df.iterrows():
    y_prob = fn_prob[row.fname]
    y_probs.append(y_prob)
    for c,p in zip(classes, y_prob):
        df.at[i,c] = p

In [44]:
y_pred = [classes[np.argmax(y)] for y in y_probs]
df['y_pred'] = y_pred


In [45]:
df.to_csv('predictions.csv')

In [46]:
print("Accuracy:", acc_score)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.6320509794666037
Precision: 0.6404258571945903
Recall: 0.6320509794666037
F1 Score: 0.6338740852124337
