In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

In [24]:
# read in data
pred_df = pd.read_csv('triple_predictions.csv')

# Get a counter for each instrument. Ex: How many "violin" values are there?
pred_df_actual_count = Counter(pred_df['Actual'])
pred_df_actual_count

Counter({'violin': 1699,
         'piano': 1359,
         'flute': 1289,
         'clarinet': 1066,
         'trumpet': 550,
         'cello': 527,
         'voice': 497,
         'guitar_electric': 492,
         'sax_soprano': 235,
         'sax_tenor': 218,
         'sax_alto': 157,
         'piccolo': 154,
         'guitar_acoustic': 145,
         'organ': 133,
         'saxophone': 117,
         'sax_baritone': 92,
         'oboe': 84,
         'bass': 33})

In [25]:
# list of lists with wrong prediction and truth
incorrect = []

# list of lists with wrong prediction and name of file, which has the name of the played instrument
actual_filename = []
for pred, actual, file_name in zip(pred_df['0'], pred_df['Actual'], pred_df['Filename']):
    if pred != actual:
        incorrect.append([pred, actual])
        actual_filename.append([pred, file_name])

print('Number of incorrect predictions:', len(incorrect))
print('Accuracy:', 1 - len(incorrect) / len(pred_df))

Number of incorrect predictions: 1804
Accuracy: 0.7960890697411552


In [4]:
# get the number of each instrument that was classified incorrectly
incorrect_actual = [actual[1] for actual in incorrect]
incorrect_actual_count = Counter(incorrect_actual)
incorrect_actual_count

Counter({'flute': 215,
         'clarinet': 213,
         'violin': 188,
         'trumpet': 148,
         'guitar_acoustic': 130,
         'voice': 128,
         'piano': 125,
         'guitar_electric': 120,
         'saxophone': 104,
         'sax_soprano': 100,
         'sax_tenor': 70,
         'organ': 70,
         'cello': 60,
         'sax_alto': 48,
         'sax_baritone': 27,
         'oboe': 27,
         'piccolo': 23,
         'bass': 8})

In [27]:
## create dictionaries that have the same key order
# order the incorrect_actual_count Counter object
rearranged_incorrect_actual_count = {key: incorrect_actual_count[key] for key in list(pred_df_actual_count.keys())}

# order the pred_df_actual_count Counter object
rearranged_pred_df_actual_count = {key: pred_df_actual_count[key] for key in list(pred_df_actual_count.keys())}
rearranged_incorrect_actual_count

{'piccolo': 23,
 'violin': 188,
 'guitar_acoustic': 130,
 'voice': 128,
 'piano': 125,
 'bass': 8,
 'flute': 215,
 'trumpet': 148,
 'sax_tenor': 70,
 'guitar_electric': 120,
 'clarinet': 213,
 'saxophone': 104,
 'organ': 70,
 'cello': 60,
 'sax_alto': 48,
 'sax_soprano': 100,
 'sax_baritone': 27,
 'oboe': 27}

In [28]:
rearranged_pred_df_actual_count

{'piccolo': 154,
 'violin': 1699,
 'guitar_acoustic': 145,
 'voice': 497,
 'piano': 1359,
 'bass': 33,
 'flute': 1289,
 'trumpet': 550,
 'sax_tenor': 218,
 'guitar_electric': 492,
 'clarinet': 1066,
 'saxophone': 117,
 'organ': 133,
 'cello': 527,
 'sax_alto': 157,
 'sax_soprano': 235,
 'sax_baritone': 92,
 'oboe': 84}

In [21]:
# dictionary of error rate for each instrument
errors_dict = {}
for instrument, count in rearranged_pred_df_actual_count.items():
    errors_dict[instrument] = rearranged_incorrect_actual_count[instrument] / count

# data frame in descending order by error rate
errors_df = pd.DataFrame(errors_dict.items(), columns=['Instrument', 'Error Rate']).sort_values(by='Error Rate', ascending=False)
errors_df

Unnamed: 0,Instrument,Error Rate
2,guitar_acoustic,0.896552
11,saxophone,0.888889
12,organ,0.526316
15,sax_soprano,0.425532
17,oboe,0.321429
8,sax_tenor,0.321101
14,sax_alto,0.305732
16,sax_baritone,0.293478
7,trumpet,0.269091
3,voice,0.257545
