In [1]:
# Add project directory
import sys
sys.path.insert(0, '..')

import os

from aerosonicdb.utils import get_project_root
from aerosonicdb.data import download
from aerosonicdb.features import extract_mfccs
from aerosonicdb.features import extract_env_mfccs
from aerosonicdb.models.mfcc import logistic_regression, mlp
from aerosonicdb.models.mfcc import cnn, rnnlstm

In [2]:
root_path = get_project_root()
dataset_path = os.path.join(root_path, 'data/raw')

In [3]:
root_path, dataset_path

(WindowsPath('C:/Users/Blake/PycharmProjects/AeroSonicDB-YPAD0523/notebooks/..'),
 'C:\\Users\\Blake\\PycharmProjects\\AeroSonicDB-YPAD0523\\notebooks\\..\\data/raw')

In [4]:
# download the dataset
download(target_path=dataset_path)

Dataset already downloaded - see C:\Users\Blake\PycharmProjects\AeroSonicDB-YPAD0523\notebooks\..\data/raw


In [5]:
# extract features for the train set
extract_mfccs.save_mfccs(set_str='train')

train features already extracted.


In [5]:
# extract features for the test set
extract_mfccs.save_mfccs(set_str='test')

train features already extracted.


In [5]:
# extract features for the environment set
extract_env_mfccs.extract_all_env_feats()

train features already extracted.


In [6]:
# run 10-fold CV with logistic regression, test against Test and Environment and print results
lr_cv, lr_test, lr_env = logistic_regression.run_cv()


Training data loaded.

Running 10-fold cross-validation...
CV results:
[0.97637696 0.99571268 0.99281047 0.99323313 0.9874602  0.99244348
 0.98116918 0.98958652 0.9971081  0.99311307]
Average Precision Score for 10-fold CV: 98.99% (0.62%)

Running 10-model evaluation against Test set...

Test data loaded.

Test evaluation results:
[0.9957370321893153, 0.9966984162541911, 0.996503044983092, 0.9964033368619089, 0.9962611110431339, 0.9963922480760298, 0.9963835592690622, 0.9969252247456627, 0.9974198435957665, 0.996344153616917]
Average Precision Score against Test set: 99.65% (0.04%)

Running 10-model evaluation against the Environment set...

Environment test data loaded.
Environment evaluation results:
[0.7937978080859566, 0.794254118738723, 0.7986706696855724, 0.7989080096260676, 0.8020987133376998, 0.7982549928545433, 0.7975446173952946, 0.7993990236860739, 0.7832176454935815, 0.7954513392711429]
Average Precision Score against Environment set: 79.62% (0.49%)


In [7]:
# run 10-fold CV with MLP, test against Test and Environment and print results
mlp_cv, mlp_test, mlp_env = mlp.run_cv(epochs=50)


Training data loaded.

Running 10-fold cross-validation...
CV results:
[0.96009255 0.99582553 0.99770857 0.9996623  0.99422271 0.99749396
 0.99321098 0.998325   0.99933314 0.99622813]
Average Precision Score for 10-fold CV: 99.32% (1.12%)

Running 10-model evaluation against Test set...

Test data loaded.

Test evaluation results:
[0.9893979224070658, 0.9951699744514452, 0.99786966567994, 0.9998214298396564, 0.9997491063029667, 0.9987291662840146, 0.9992972755018208, 0.999679017823844, 0.9997861747178183, 0.9998214298396564]
Average Precision Score against Test set: 99.79% (0.32%)

Running 10-model evaluation against the Environment set...

Environment test data loaded.
Environment evaluation results:
[0.5119625377032132, 0.8390447897182393, 0.6129027612647318, 0.7713856881980936, 0.7207754244722735, 0.5493398922683471, 0.7648976726183944, 0.8355890963790086, 0.8092237702848674, 0.809648696442879]
Average Precision Score against Environment set: 72.25% (11.49%)


In [8]:
# run 10-fold CV with CNN, test against Test and Environment and print results
cnn_cv, cnn_test, cnn_env = cnn.run_cv(epochs=50)


Training data loaded.

Running 10-fold cross-validation...
CV results:
[0.98886593 0.99459044 0.9912718  0.99490298 0.99264209 0.99820065
 0.98507758 0.99534357 0.99960412 0.99601983]
Average Precision Score for 10-fold CV: 99.37% (0.41%)

Running 10-model evaluation against Test set...

Test data loaded.

Test evaluation results:
[0.9949214082601454, 0.9940977811320869, 0.995132776364976, 0.9945543109360311, 0.9946245797880886, 0.9960883692290879, 0.9959264910441642, 0.9949084565782158, 0.9948713080905447, 0.9933954394674749]
Average Precision Score against Test set: 99.49% (0.07%)

Running 10-model evaluation against the Environment set...

Environment test data loaded.
Environment evaluation results:
[0.8209159236677798, 0.7625746097782835, 0.7898474530259875, 0.8362101613727897, 0.8201994513491638, 0.8277294060548548, 0.8114194196237295, 0.8374844885124119, 0.8426244841161656, 0.7947344351527386]
Average Precision Score against Environment set: 81.44% (2.40%)


In [9]:
lr_cv

(98.99013789048396,
 0.6223089492299467,
 array([0.97637696, 0.99571268, 0.99281047, 0.99323313, 0.9874602 ,
        0.99244348, 0.98116918, 0.98958652, 0.9971081 , 0.99311307]))

In [10]:
mlp_cv

(99.32102863355918,
 1.1214359200649127,
 array([0.96009255, 0.99582553, 0.99770857, 0.9996623 , 0.99422271,
        0.99749396, 0.99321098, 0.998325  , 0.99933314, 0.99622813]))

In [11]:
cnn_cv

(99.36518997299683,
 0.41248508555458235,
 array([0.98886593, 0.99459044, 0.9912718 , 0.99490298, 0.99264209,
        0.99820065, 0.98507758, 0.99534357, 0.99960412, 0.99601983]))

In [12]:
lr_test

(99.6506797063508,
 0.041998014218560656,
 [0.9957370321893153,
  0.9966984162541911,
  0.996503044983092,
  0.9964033368619089,
  0.9962611110431339,
  0.9963922480760298,
  0.9963835592690622,
  0.9969252247456627,
  0.9974198435957665,
  0.996344153616917])

In [13]:
mlp_test

(99.7932116284823,
 0.3160607582741692,
 [0.9893979224070658,
  0.9951699744514452,
  0.99786966567994,
  0.9998214298396564,
  0.9997491063029667,
  0.9987291662840146,
  0.9992972755018208,
  0.999679017823844,
  0.9997861747178183,
  0.9998214298396564])

In [14]:
cnn_test

(99.48520920890816,
 0.07484053866941984,
 [0.9949214082601454,
  0.9940977811320869,
  0.995132776364976,
  0.9945543109360311,
  0.9946245797880886,
  0.9960883692290879,
  0.9959264910441642,
  0.9949084565782158,
  0.9948713080905447,
  0.9933954394674749])

In [15]:
lr_env

(79.61596938174655,
 0.49309212465328345,
 [0.7937978080859566,
  0.794254118738723,
  0.7986706696855724,
  0.7989080096260676,
  0.8020987133376998,
  0.7982549928545433,
  0.7975446173952946,
  0.7993990236860739,
  0.7832176454935815,
  0.7954513392711429])

In [16]:
mlp_env

(72.24770329350048,
 11.489128864833773,
 [0.5119625377032132,
  0.8390447897182393,
  0.6129027612647318,
  0.7713856881980936,
  0.7207754244722735,
  0.5493398922683471,
  0.7648976726183944,
  0.8355890963790086,
  0.8092237702848674,
  0.809648696442879])

In [17]:
cnn_env

(81.43739832653904,
 2.3965125456717464,
 [0.8209159236677798,
  0.7625746097782835,
  0.7898474530259875,
  0.8362101613727897,
  0.8201994513491638,
  0.8277294060548548,
  0.8114194196237295,
  0.8374844885124119,
  0.8426244841161656,
  0.7947344351527386])

In [25]:
list(lr_cv[2]).index(max(list(lr_cv[2])))

8

In [47]:
def fetch_best_model_indexes(model, cv, test, env):
    
    cv = list(cv)

    cv_best = cv.index(max(cv))
    cv_worst = cv.index(min(cv))
    # print(cv_best)
    
    test_best = test.index(max(test))
    test_worst = test.index(min(test))
    # print(test_best)
    
    env_best = env.index(max(env))
    env_worst = env.index(min(env))
    # print(env_best)
    
    print(f'Model: {model}\nCV: Best {cv_best}, Worst {cv_worst}\nTEST: Best {test_best}, Worst {test_worst}\nENV: Best {env_best}, Worst {env_worst}')

In [49]:
fetch_best_model_indexes('Logistic regression', lr_cv[2], lr_test[2], lr_env[2])

Model: Logistic regression
CV: Best 8, Worst 0
TEST: Best 8, Worst 0
ENV: Best 4, Worst 8


In [50]:
fetch_best_model_indexes('MLP', mlp_cv[2], mlp_test[2], mlp_env[2])

Model: MLP
CV: Best 3, Worst 0
TEST: Best 3, Worst 0
ENV: Best 1, Worst 0


In [51]:
fetch_best_model_indexes('CNN', cnn_cv[2], cnn_test[2], cnn_env[2])

Model: CNN
CV: Best 8, Worst 6
TEST: Best 5, Worst 9
ENV: Best 8, Worst 1
