# https://www.coursera.org/learn/advanced-data-science-capstone/
# "capstone_orchestra_detection" notebook set
# for project: sound classification for the instruments of the orchestra

# model_evaluate

In [1]:

import matplotlib.pyplot as plt 
import numpy as np
from scipy.io import wavfile
from numpy.fft import fft, ifft
import scipy.signal as sig


external dependencies:
* ./footprints.json
* ./modelprediction*.csv
* ./predictionclasses.csv


In [2]:
import pandas as pd
import json

## load supervised / expected / actual data (A)

In [3]:
def get_footprints_data():
    with open('footprints.json') as footprints_file:
        footprints = json.load(footprints_file)
    
    print('footprints loaded with',len(footprints),'items')
    return footprints

In [4]:
footprints = get_footprints_data()

footprints loaded with 18 items


In [5]:
def get_footprints_data_df(footprints):
    dfu = None
    for fp in footprints:
        df = pd.DataFrame({
            'label':[fp['label']],
            'train':[fp['train']],
            'istm':[fp['istm']],
            'note':[fp['note']]
        })
        if dfu is None:
            dfu = df
        else:
            dfu = pd.concat([dfu,df],ignore_index=True)
        
    return dfu

In [6]:
dffp=get_footprints_data_df(footprints)
dfa = dffp.query('train==0')
dfa

Unnamed: 0,label,train,istm,note
2,Bassoon3a_x,0,Bassoon,G3
5,Viola3a_x,0,Viola,G4
8,Cello3a_x,0,Cello,D3
11,FrHorn3a_x,0,FrHorn,C4
14,Flute3a_x,0,Flute,E4
17,Clarinet3a_x,0,Clarinet,F#3


## load istm-dummy mapping for prediction models (M)

In [7]:
dfm= pd.read_csv('predictionclasses.csv') # ,index_col='class')
assert dfm.columns.tolist() == 'istm,class,classn'.split(',')
dfm

Unnamed: 0,istm,class,classn
0,Bassoon,0,0.0
1,Viola,1,1.0
2,Cello,2,2.0
3,FrHorn,3,3.0
4,Flute,4,4.0
5,Clarinet,5,5.0


In [8]:
dfm['predicted_class'] = dfm['class']
dfm

Unnamed: 0,istm,class,classn,predicted_class
0,Bassoon,0,0.0,0
1,Viola,1,1.0,1
2,Cello,2,2.0,2
3,FrHorn,3,3.0,3
4,Flute,4,4.0,4
5,Clarinet,5,5.0,5


## load model predictions (P)

In [9]:
!dir /b modelprediction*.csv

modelprediction.CORR_ON_LININTERP_MERGED_PEAKFREQS.1.csv
modelprediction.MAE_ON_MEAN_ROUNDED_PEAKFREQS.1.csv
modelprediction.MLP.1.csv
modelprediction.SVC.1.csv


In [10]:
evaluate = 'modelprediction.CORR_ON_LININTERP_MERGED_PEAKFREQS.1.csv'

In [11]:
dfp = pd.read_csv(evaluate)[['test_sample','predicted_class']]
dfp

Unnamed: 0,test_sample,predicted_class
0,Bassoon3a_x,4
1,Viola3a_x,2
2,Cello3a_x,1
3,FrHorn3a_x,4
4,Flute3a_x,4
5,Clarinet3a_x,5


## combining the three datasets P,M,A

In [12]:
def joindata4eval(dfp,dfm,dfa,ret):

    dfpm = dfp.join(dfm,on=['predicted_class'],rsuffix='_map')[['test_sample','predicted_class','istm']]
    dfpm = dfpm.rename(columns={'istm':'predicted_istm'}).set_index('test_sample')
    
    dfa2 = dfa.rename(columns={'label':'test_sample','istm':'actual_istm'}).set_index('test_sample')

    dfpma = dfpm.join(dfa2,rsuffix='_act')
    dfpma['match'] = dfpma['predicted_istm'] == dfpma['actual_istm']
    
    TP = len(dfpma[dfpma.match])
    FP = len(dfpma) - TP
    TN = 0
    FN = 0
    accuracy = (TP + TN) / (TP + FP + TN + FN)

    if ret=='df': return dfpma    
    if ret=='accuracy': return round(accuracy,2)


In [13]:
dfpma = joindata4eval(dfp,dfm,dfa,'df')
dfpma

Unnamed: 0_level_0,predicted_class,predicted_istm,train,actual_istm,note,match
test_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bassoon3a_x,4,Flute,0,Bassoon,G3,False
Viola3a_x,2,Cello,0,Viola,G4,False
Cello3a_x,1,Viola,0,Cello,D3,False
FrHorn3a_x,4,Flute,0,FrHorn,C4,False
Flute3a_x,4,Flute,0,Flute,E4,True
Clarinet3a_x,5,Clarinet,0,Clarinet,F#3,True


In [14]:
print('accuracy','of',evaluate,'is',joindata4eval(dfp,dfm,dfa,'accuracy'))

accuracy of modelprediction.CORR_ON_LININTERP_MERGED_PEAKFREQS.1.csv is 0.33


* accuracy of modelprediction.SVC.1.csv is 0.17
* accuracy of modelprediction.CORR_ON_LININTERP_MERGED_PEAKFREQS.1.csv is 0.33
* accuracy of modelprediction.MAE_ON_MEAN_ROUNDED_PEAKFREQS.1.csv is 0.0
* accuracy of modelprediction.MLP.1.csv is 0.33

# comment
* by computing the distance between the data points of the footprints, the best accuracy occurred in our tests is 40%

In [15]:
print('thank you.')

thank you.


# END OF model_evaluate document