In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
root_path= "drive/My Drive/SUTD/Year 3/CDS/Project/Team No Name/"
# root_path= "drive/My Drive/SUTD ML/Team No Name/"

In [None]:
import os
import pandas as pd
import numpy as np
import keras
import pickle
from sklearn.svm import OneClassSVM
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

csv_path = os.path.join(root_path,'UrbanSound8K/metadata/UrbanSound8K.csv')
spectrograms_path = os.path.join(root_path,'numpySpectrograms/')
test_size = 0.2
batch_size = 16

#Using original spectrograms

In [None]:
from tqdm import tqdm

class SpecLoader():
  def __init__(self, x_set, spec_dir):
    self.x = x_set
    self.spec_dir = spec_dir

  def __len__(self):
    return len(self.x)

  def getitem(self):
    specs = []
    for fileName in tqdm(self.x):
        spec = np.load(self.spec_dir + fileName + ".npy")
        specs.append(spec.transpose())
    return np.array(specs)

  def getitemlist(self):
    specs = []
    for fileName in tqdm(self.x):
        spec = np.load(self.spec_dir + fileName + ".npy")
        specs.append(spec.transpose())
    return specs

class SpecLoaderWnn(keras.utils.Sequence):
  def __init__(self, x_set, batch_size, spec_dir):
    self.x = x_set
    self.batch_size = batch_size
    self.spec_dir = spec_dir

  def __len__(self):
    return int(np.ceil(len(self.x) / self.batch_size))

  def __getitem__(self, idx):
    batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
    # batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

    batchSpecs = []
    for fileName in batch_x:
        spec = np.load(self.spec_dir + fileName + ".npy")
        batchSpecs.append(spec.transpose())
    return np.array(batchSpecs)

In [None]:
data_df = pd.read_csv(csv_path)
data_df_known = data_df.loc[data_df["classID"] < 5]
data_df_unknown = data_df.loc[data_df["classID"] >= 5]

Xtrainval, X_test, Ytrainval, y_test = train_test_split(data_df_known['slice_file_name'].tolist(), data_df_known['classID'].tolist(), test_size=test_size, random_state = 42)

In [None]:
#Xtrainval = data_df_known['slice_file_name'].tolist()
#Ytrainval = data_df_known['classID'].tolist()

sl = SpecLoader(Xtrainval, spectrograms_path)
trainX = sl.getitem()

nsamples, x, y, z = trainX.shape
trainX = trainX.reshape((nsamples, x*y*z))




In [None]:

# define outlier detection model
modellinear = OneClassSVM(kernel="linear", verbose=True)

# fit 
modellinear.fit(trainX)

# save
pickle.dump(modellinear, open(root_path+"OneClassSVM Models/original_linear", 'wb'))

[LibSVM]

In [None]:
modelpoly = OneClassSVM(kernel="poly", verbose=True)

# fit 
modelpoly.fit(trainX)

# save
pickle.dump(modelpoly, open(root_path+"OneClassSVM Models/original_poly", 'wb'))

[LibSVM]

In [None]:
modelrbf = OneClassSVM(kernel="rbf", verbose=True)

# fit 
modelrbf.fit(trainX)

# save
pickle.dump(modelrbf, open(root_path+"OneClassSVM Models/original_rbf", 'wb'))

[LibSVM]

In [None]:
modelsigmoid = OneClassSVM(kernel="sigmoid", verbose=True)

# fit 
modelsigmoid.fit(trainX)

# save
pickle.dump(modelsigmoid, open(root_path+"OneClassSVM Models/original_sigmoid", 'wb'))

[LibSVM]

###Test

In [None]:
# load in the models
print("loading linear...")
modellinear = pickle.load(open(root_path+"OneClassSVM Models/original_linear", 'rb'))
print("loading poly...")
modelpoly = pickle.load(open(root_path+"OneClassSVM Models/original_poly", 'rb'))
print("loading rbf...")
modelrbf = pickle.load(open(root_path+"OneClassSVM Models/original_rbf", 'rb'))
print("loading sigmoid...")
modelsigmoid = pickle.load(open(root_path+"OneClassSVM Models/original_sigmoid", 'rb'))

loading linear...
loading poly...
loading rbf...
loading sigmoid...


In [None]:
#Xtestvalunknown = data_df_unknown['slice_file_name'].tolist()
X_trash, Xtestvalunknown, y_trash, y_unknown = train_test_split(data_df_unknown['slice_file_name'].tolist(), data_df_unknown['classID'].tolist(), test_size=test_size, random_state = 42)

#Xtestvalunknown = Xtestvalunknown[:25]
#X_test = X_test[:25]

testX = Xtestvalunknown + X_test

actualtestY = [-1 for i in Xtestvalunknown] + y_test
print("length of actual test y", len(actualtestY))

testY = [-1 for i in Xtestvalunknown]+[1 for i in X_test]
testY = np.array(testY)
print(testY.shape)
print("length of test y (1 and -1)", len(testY))

sltest = SpecLoader(testX, spectrograms_path)
testX = sltest.getitem()

nsamples, x, y, z = testX.shape
testX = testX.reshape((nsamples, x*y*z))

  0%|          | 0/1747 [00:00<?, ?it/s]

length of actual test y 1747
(1747,)
length of test y (1 and -1) 1747


100%|██████████| 1747/1747 [09:53<00:00,  2.95it/s]


In [None]:
# detect outliers in the test set
print("predicting linear...")
yhatLinear = modellinear.predict(testX)
print("predicting poly...")
yhatPoly = modelpoly.predict(testX)
print("predicting rbf...")
yhatRBF = modelrbf.predict(testX)
print("predicting sigmoid...")
yhatSigmoid = modelsigmoid.predict(testX)

predicting linear...
predicting poly...
predicting rbf...
predicting sigmoid...


In [None]:
# evaluate
# input: x,y - numpy of shape (n,) 
# output: percentage of hits
def evaluate(x,y):
  hit = 0
  for i in range(len(y)):
    if int(x[i])==int(y[i]):
      hit+=1
  return hit/len(y)

def evaluate2(predicted, expected):
    acc = np.mean(np.array(predicted) == np.array(expected))
    print("Overall accuracy: {}".format(acc))
    acc_dict = {}
    for i in range(len(expected)):
        expected_class = expected[i]
        if expected_class not in acc_dict:
            acc_dict[expected_class] = [0, 0]
        acc_dict[expected_class][1] += 1
        if expected_class == predicted[i]:
            acc_dict[expected_class][0] += 1
    for k,v in acc_dict.items():
        print("Accuracy for class {}: {}".format(k, v[0]/v[1]))
    for average in ["macro", "weighted", "micro"]:
        f1 = f1_score(expected, predicted, average=average)
        print("{} f1 score: {}".format(average, f1))

In [None]:
from sklearn.metrics import f1_score

kn = ["Linear","Poly","RBF","Sigmoid"]
yhats = [yhatLinear,yhatPoly,yhatRBF,yhatSigmoid]
print("Accuracy - hit rate")
print("--------------------------------")

for i in range(len(kn)):
  print("{}: {}".format(kn[i],evaluate(yhats[i],testY)))

Accuracy - hit rate
--------------------------------
Linear: 0.527761877504293
Poly: 0.5037206639954207
RBF: 0.46307956496851743
Sigmoid: 0.5317687464224384


In [None]:
from tensorflow import keras 
model = keras.models.load_model(root_path+"basemodel-known")


In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatLinear)) if yhatLinear[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatLinear[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatLinear
  yhatLinear[index_yhat] = predClass

print("prediction of positive classes through base model", yhatLinear)

print("\nLinear - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatLinear, average='macro'))
print("Micro:", f1_score(actualtestY, yhatLinear, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatLinear, average='weighted'))
print("None:", f1_score(actualtestY, yhatLinear, average=None))

print("\nEvaluate\n")
evaluate2(yhatLinear, actualtestY)

length of index_normalList 859
length of normalXfilenames 859
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
predict using one class svm: [ 1 -1 -1  1 -1  1 -1  1 -1 -1 -1 -1  1 -1 -1  1  1  1  1  1 -1 -1  1  1
  1 -1  1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1  1  1  1  1  1 -1  1  1  1 -1
 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1  1 -1  1  1 -1 -1
  1  1  1 -1 -1  1  1 -1  1 -1  1 -1  1  1 -1  1  1 -1  1 -1 -1 -1  1  1
 -1 -1 -1 -1]
prediction of positive classes through base model [ 4 -1 -1 ... -1  2 -1]

Linear - F1 Scores
--------------------------------
Macro: 0.4823245617287202
Micro: 0.5123068116771609
Weighted: 0.5047722601774953
None: [0.52830189 0.4040404  0.47945205 0.61691542 0.5891182 

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatPoly)) if yhatPoly[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatPoly[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatPoly
  yhatPoly[index_yhat] = predClass

print("prediction of positive classes through base model", yhatPoly)

print("\nPoly - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatPoly, average='macro'))
print("Micro:", f1_score(actualtestY, yhatPoly, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatPoly, average='weighted'))
print("None:", f1_score(actualtestY, yhatPoly, average=None))

print("\nEvaluate\n")
evaluate2(yhatPoly, actualtestY)

length of index_normalList 897
length of normalXfilenames 897
predict using one class svm: [ 1  1 -1  1  1  1 -1  1 -1  1 -1 -1  1 -1 -1  1  1  1  1  1 -1 -1 -1  1
  1 -1  1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1  1 -1 -1  1  1  1  1  1  1 -1
 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1  1 -1 -1  1  1 -1  1  1 -1 -1
  1  1  1 -1 -1  1  1 -1  1 -1  1 -1  1  1 -1  1  1  1  1 -1  1 -1  1  1
 -1 -1 -1 -1]
prediction of positive classes through base model [ 4  4 -1 ... -1 -1 -1]

Poly - F1 Scores
--------------------------------
Macro: 0.4600097340004173
Micro: 0.48769318832283914
Weighted: 0.48250618399920786
None: [0.49327878 0.41089109 0.38888889 0.58974359 0.58866545 0.2885906 ]

Evaluate

Overall accuracy: 0.48769318832283914
Accuracy for class -1: 0.4901277584204414
Accuracy for class 2: 0.5665024630541872
Accuracy for class 4: 0.25
Accuracy for class 0: 0.39712918660287083
Accuracy for class 3: 0.706140350877193
Accuracy for class 1: 0.3783783783783784
macro f1 score: 0.4600097340004173
weig

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatRBF)) if yhatRBF[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatRBF[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatRBF
  yhatRBF[index_yhat] = predClass

print("prediction of positive classes through base model", yhatRBF)

print("\nRBF - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatRBF, average='macro'))
print("Micro:", f1_score(actualtestY, yhatRBF, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatRBF, average='weighted'))
print("None:", f1_score(actualtestY, yhatRBF, average=None))

print("\nEvaluate\n")
evaluate2(yhatRBF, actualtestY)

length of index_normalList 944
length of normalXfilenames 944
predict using one class svm: [-1  1  1  1 -1  1  1 -1  1 -1  1  1 -1 -1  1 -1 -1 -1 -1  1  1 -1  1 -1
 -1  1  1  1  1  1  1  1 -1  1  1  1  1 -1 -1  1 -1  1 -1 -1 -1  1  1  1
  1  1  1  1  1 -1  1 -1  1  1 -1  1 -1  1  1  1  1  1  1 -1  1 -1 -1  1
  1 -1 -1 -1 -1 -1 -1  1  1  1  1  1 -1 -1 -1 -1  1 -1 -1  1 -1 -1 -1 -1
  1  1  1  1]
prediction of positive classes through base model [-1  4  0 ...  3  2 -1]

RBF - F1 Scores
--------------------------------
Macro: 0.44533123971837707
Micro: 0.44934172867773325
Weighted: 0.4430046915041052
None: [0.43629808 0.58264463 0.44776119 0.66968326 0.27322404 0.26237624]

Evaluate

Overall accuracy: 0.44934172867773325
Accuracy for class -1: 0.42160278745644597
Accuracy for class 2: 0.729064039408867
Accuracy for class 4: 0.3081395348837209
Accuracy for class 0: 0.6746411483253588
Accuracy for class 3: 0.21929824561403508
Accuracy for class 1: 0.40540540540540543
macro f1 score: 0.445331

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatSigmoid)) if yhatSigmoid[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatSigmoid[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatSigmoid
  yhatSigmoid[index_yhat] = predClass

print("prediction of positive classes through base model", yhatSigmoid)

print("\nSigmoid - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatSigmoid, average='macro'))
print("Micro:", f1_score(actualtestY, yhatSigmoid, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatSigmoid, average='weighted'))
print("None:", f1_score(actualtestY, yhatSigmoid, average=None))

print("\nEvaluate\n")
evaluate2(yhatSigmoid, actualtestY)

length of index_normalList 866
length of normalXfilenames 866
predict using one class svm: [ 1 -1  1  1 -1  1 -1 -1 -1 -1 -1  1 -1 -1 -1  1 -1  1  1  1  1 -1 -1  1
  1 -1  1  1 -1 -1 -1  1  1 -1 -1 -1 -1 -1  1  1  1  1  1 -1 -1  1  1 -1
 -1 -1  1 -1 -1 -1 -1  1 -1  1 -1 -1 -1  1  1 -1 -1 -1  1 -1  1  1 -1 -1
  1 -1 -1 -1 -1  1 -1  1  1 -1  1  1  1 -1 -1 -1  1 -1  1 -1 -1 -1 -1  1
  1 -1 -1 -1]
prediction of positive classes through base model [ 4 -1  0 ...  3  2 -1]

Sigmoid - F1 Scores
--------------------------------
Macro: 0.4875586054491046
Micro: 0.5151688609044076
Weighted: 0.5087364078889922
None: [0.5304248  0.43520782 0.47222222 0.66059226 0.53278689 0.29411765]

Evaluate

Overall accuracy: 0.5151688609044076
Accuracy for class -1: 0.5365853658536586
Accuracy for class 2: 0.7142857142857143
Accuracy for class 4: 0.23255813953488372
Accuracy for class 0: 0.4258373205741627
Accuracy for class 3: 0.5701754385964912
Accuracy for class 1: 0.4594594594594595
macro f1 score: 0.487558

#Using features extracted from base model

In [None]:
import numpy as np
import pandas as pd

import json
import keras
import tensorflow
from keras import layers
from keras.models import Sequential
from keras.applications import DenseNet201
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score

from keras.models import load_model

In [None]:
model_save_path = os.path.join(root_path,"basemodel-known")
# load model
model = load_model(model_save_path)
# summarize model.
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet201 (Functional)     (None, None, None, 1920)  18321984  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1920)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 9605      
Total params: 18,331,589
Trainable params: 18,102,533
Non-trainable params: 229,056
_________________________________________________________________


In [None]:
layer_names = [layer.name for layer in model.layers]
print(layer_names) #['densenet201', 'global_average_pooling2d_1', 'dense_1']

['densenet201', 'global_average_pooling2d_1', 'dense_1']


##Try using one training instance

In [None]:
#try one data
onedatanp = np.load(spectrograms_path + "344-3-0-0.wav.npy")
print(onedatanp.shape)

#append numpy into [] -> 4 dimensions (nsamples,x,y,z)
finalonedatanp = []
finalonedatanp.append(onedatanp.transpose())
finalonedatanp = np.array(finalonedatanp)

print(finalonedatanp.shape)

(3, 128, 250)
(1, 250, 128, 3)


In [None]:
from keras import Model

model_output = model.get_layer("global_average_pooling2d_1").output
m = Model(inputs=model.input, outputs=model_output)
finaloutput = m.predict(finalonedatanp)
print(finaloutput.shape)

(1, 1920)


In [None]:
print(np.array(finaltrainingset).shape)

(128, 1920)


In [None]:
np1 = np.array([[[1,2],[2,3,4,5]],[[3,4],[3,4,5,6]]])
print(np1.shape)
np2 = np.array([[[1,2],[2,3,4,5]],[[3,4],[3,4,5,6]]])
np1=np.concatenate((np1,np2),axis=0)
print(np1.shape)

(2, 2)
(4, 2)


In [None]:
print(type(np.array([2,3])))

<class 'numpy.ndarray'>


##All training instances

In [None]:
# original, shape into OneClassSVM: (3543, 96000)
# after NN, shape into OneClassSVM: (3543, 1920)

batch_size = 16

In [None]:
class SpecLoaderWnn(keras.utils.Sequence):
  def __init__(self, x_set, batch_size, spec_dir):
    self.x = x_set
    self.batch_size = batch_size
    self.spec_dir = spec_dir

  def __len__(self):
    return int(np.ceil(len(self.x) / self.batch_size))

  def getitem(self,idx):
    batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
    # batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

    batchSpecs = []
    for fileName in batch_x:
        spec = np.load(self.spec_dir + fileName + ".npy")
        batchSpecs.append(spec.transpose())
    return np.array(batchSpecs)

  def __getitem__(self, idx):
    batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
    # batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

    batchSpecs = []
    for fileName in batch_x:
        spec = np.load(self.spec_dir + fileName + ".npy")
        batchSpecs.append(spec.transpose())
    return np.array(batchSpecs)

In [None]:
#prepare data
from sklearn.model_selection import train_test_split

data_df = pd.read_csv(csv_path)
data_df_known = data_df.loc[data_df["classID"] < 5]
data_df_unknown = data_df.loc[data_df["classID"] >= 5]

Xtrainval, X_test, Ytrainval, y_test = train_test_split(data_df_known['slice_file_name'].tolist(), data_df_known['classID'].tolist(), test_size=test_size, random_state = 42)

In [None]:
#Xtrainval = data_df_known['slice_file_name'].tolist()
#Ytrainval = data_df_known['classID'].tolist()

train_loader = SpecLoaderWnn(Xtrainval, Ytrainval, batch_size, spectrograms_path)


In [None]:
from keras import Model
from tqdm import tqdm
finaltrainingset = None
for ind in tqdm(range(len(train_loader))):
  model_output = model.get_layer("global_average_pooling2d_1").output
  m = Model(inputs=model.input, outputs=model_output)
  finaloutput = m.predict(train_loader.getitem(ind))
  if ind==0:
    print(finaloutput.shape)
  if type(finaltrainingset) == type(None):
    finaltrainingset = finaloutput
  else:
    finaltrainingset=np.concatenate((finaltrainingset,finaloutput),axis=0)





  0%|          | 0/222 [00:00<?, ?it/s][A[A[A[A







  0%|          | 1/222 [00:05<18:54,  5.13s/it][A[A[A[A

(16, 1920)






  1%|          | 2/222 [00:10<18:49,  5.13s/it][A[A[A[A







  1%|▏         | 3/222 [00:15<18:33,  5.09s/it][A[A[A[A







  2%|▏         | 4/222 [00:20<18:23,  5.06s/it][A[A[A[A







  2%|▏         | 5/222 [00:25<18:10,  5.02s/it][A[A[A[A







  3%|▎         | 6/222 [00:30<18:03,  5.02s/it][A[A[A[A







  3%|▎         | 7/222 [00:36<19:25,  5.42s/it][A[A[A[A







  4%|▎         | 8/222 [00:41<18:58,  5.32s/it][A[A[A[A







  4%|▍         | 9/222 [00:46<18:33,  5.23s/it][A[A[A[A







  5%|▍         | 10/222 [00:51<18:19,  5.19s/it][A[A[A[A







  5%|▍         | 11/222 [00:56<18:08,  5.16s/it][A[A[A[A







  5%|▌         | 12/222 [01:01<17:55,  5.12s/it][A[A[A[A







  6%|▌         | 13/222 [01:06<17:44,  5.09s/it][A[A[A[A







  6%|▋         | 14/222 [01:11<17:40,  5.10s/it][A[A[A[A







  7%|▋         | 15/222 [01:17<18:11,  5.27s/it][A[A[A[A







  7%|▋         | 16/222 [01:23<19:07,  5.57s/it][A[A[A[A







  8%|▊         | 17/222 [01:28<18:25,  5.39s/it][A[A[A[A







  8%|▊         | 18/222 [01:33<17:46,  5.23s/it][A[A[A[A







  9%|▊         | 19/222 [01:38<17:19,  5.12s/it][A[A[A[A







  9%|▉         | 20/222 [01:43<17:01,  5.06s/it][A[A[A[A







  9%|▉         | 21/222 [01:48<16:50,  5.03s/it][A[A[A[A







 10%|▉         | 22/222 [01:53<16:39,  5.00s/it][A[A[A[A







 10%|█         | 23/222 [01:58<16:48,  5.07s/it][A[A[A[A







 11%|█         | 24/222 [02:05<18:03,  5.47s/it][A[A[A[A







 11%|█▏        | 25/222 [02:10<17:37,  5.37s/it][A[A[A[A







 12%|█▏        | 26/222 [02:15<17:17,  5.29s/it][A[A[A[A







 12%|█▏        | 27/222 [02:20<16:59,  5.23s/it][A[A[A[A







 13%|█▎        | 28/222 [02:25<16:50,  5.21s/it][A[A[A[A







 13%|█▎        | 29/222 [02:30<16:45,  5.21s/it][A[A[A[A







 14%|█▎        | 30/222 [02:36<16:47,  5.25s/it][A[A[A[A







 14%|█▍        | 31/222 [02:41<16:39,  5.23s/it][A[A[A[A







 14%|█▍        | 32/222 [02:47<17:38,  5.57s/it][A[A[A[A







 15%|█▍        | 33/222 [02:52<17:05,  5.42s/it][A[A[A[A







 15%|█▌        | 34/222 [02:57<16:42,  5.33s/it][A[A[A[A







 16%|█▌        | 35/222 [03:02<16:19,  5.24s/it][A[A[A[A







 16%|█▌        | 36/222 [03:08<16:10,  5.22s/it][A[A[A[A







 17%|█▋        | 37/222 [03:13<16:15,  5.27s/it][A[A[A[A







 17%|█▋        | 38/222 [03:18<16:03,  5.24s/it][A[A[A[A







 18%|█▊        | 39/222 [03:23<15:46,  5.17s/it][A[A[A[A







 18%|█▊        | 40/222 [03:28<15:44,  5.19s/it][A[A[A[A







 18%|█▊        | 41/222 [03:35<16:39,  5.52s/it][A[A[A[A







 19%|█▉        | 42/222 [03:40<15:58,  5.32s/it][A[A[A[A







 19%|█▉        | 43/222 [03:44<15:23,  5.16s/it][A[A[A[A







 20%|█▉        | 44/222 [03:49<14:56,  5.04s/it][A[A[A[A







 20%|██        | 45/222 [03:54<14:35,  4.94s/it][A[A[A[A







 21%|██        | 46/222 [03:59<14:27,  4.93s/it][A[A[A[A







 21%|██        | 47/222 [04:04<14:24,  4.94s/it][A[A[A[A







 22%|██▏       | 48/222 [04:09<14:22,  4.96s/it][A[A[A[A







 22%|██▏       | 49/222 [04:15<15:27,  5.36s/it][A[A[A[A







 23%|██▎       | 50/222 [04:20<14:59,  5.23s/it][A[A[A[A







 23%|██▎       | 51/222 [04:25<14:30,  5.09s/it][A[A[A[A







 23%|██▎       | 52/222 [04:29<14:08,  4.99s/it][A[A[A[A







 24%|██▍       | 53/222 [04:34<13:57,  4.95s/it][A[A[A[A







 24%|██▍       | 54/222 [04:39<13:48,  4.93s/it][A[A[A[A







 25%|██▍       | 55/222 [04:44<13:52,  4.99s/it][A[A[A[A







 25%|██▌       | 56/222 [04:49<13:59,  5.05s/it][A[A[A[A







 26%|██▌       | 57/222 [04:55<14:04,  5.12s/it][A[A[A[A







 26%|██▌       | 58/222 [05:01<14:49,  5.43s/it][A[A[A[A







 27%|██▋       | 59/222 [05:06<14:27,  5.32s/it][A[A[A[A







 27%|██▋       | 60/222 [05:11<14:06,  5.22s/it][A[A[A[A







 27%|██▋       | 61/222 [05:16<13:49,  5.15s/it][A[A[A[A







 28%|██▊       | 62/222 [05:21<13:31,  5.07s/it][A[A[A[A







 28%|██▊       | 63/222 [05:26<13:23,  5.06s/it][A[A[A[A







 29%|██▉       | 64/222 [05:31<13:11,  5.01s/it][A[A[A[A







 29%|██▉       | 65/222 [05:36<13:19,  5.10s/it][A[A[A[A







 30%|██▉       | 66/222 [05:42<14:06,  5.43s/it][A[A[A[A







 30%|███       | 67/222 [05:47<13:40,  5.29s/it][A[A[A[A







 31%|███       | 68/222 [05:52<13:19,  5.19s/it][A[A[A[A







 31%|███       | 69/222 [05:57<13:02,  5.11s/it][A[A[A[A







 32%|███▏      | 70/222 [06:02<12:48,  5.06s/it][A[A[A[A







 32%|███▏      | 71/222 [06:07<12:37,  5.02s/it][A[A[A[A







 32%|███▏      | 72/222 [06:12<12:36,  5.04s/it][A[A[A[A







 33%|███▎      | 73/222 [06:17<12:26,  5.01s/it][A[A[A[A







 33%|███▎      | 74/222 [06:23<13:14,  5.37s/it][A[A[A[A







 34%|███▍      | 75/222 [06:28<13:00,  5.31s/it][A[A[A[A







 34%|███▍      | 76/222 [06:33<12:42,  5.22s/it][A[A[A[A







 35%|███▍      | 77/222 [06:38<12:31,  5.18s/it][A[A[A[A







 35%|███▌      | 78/222 [06:43<12:16,  5.11s/it][A[A[A[A







 36%|███▌      | 79/222 [06:48<12:09,  5.10s/it][A[A[A[A







 36%|███▌      | 80/222 [06:55<13:01,  5.50s/it][A[A[A[A







 36%|███▋      | 81/222 [07:00<12:58,  5.52s/it][A[A[A[A







 37%|███▋      | 82/222 [07:06<12:38,  5.41s/it][A[A[A[A







 37%|███▋      | 83/222 [07:12<13:10,  5.69s/it][A[A[A[A







 38%|███▊      | 84/222 [07:17<12:38,  5.50s/it][A[A[A[A







 38%|███▊      | 85/222 [07:22<12:09,  5.32s/it][A[A[A[A







 39%|███▊      | 86/222 [07:27<11:46,  5.19s/it][A[A[A[A







 39%|███▉      | 87/222 [07:32<11:33,  5.14s/it][A[A[A[A







 40%|███▉      | 88/222 [07:37<11:18,  5.06s/it][A[A[A[A







 40%|████      | 89/222 [07:42<11:05,  5.01s/it][A[A[A[A







 41%|████      | 90/222 [07:47<11:08,  5.07s/it][A[A[A[A







 41%|████      | 91/222 [07:53<11:46,  5.39s/it][A[A[A[A







 41%|████▏     | 92/222 [07:58<11:24,  5.26s/it][A[A[A[A







 42%|████▏     | 93/222 [08:03<11:07,  5.18s/it][A[A[A[A







 42%|████▏     | 94/222 [08:08<10:55,  5.12s/it][A[A[A[A







 43%|████▎     | 95/222 [08:13<10:45,  5.09s/it][A[A[A[A







 43%|████▎     | 96/222 [08:18<10:39,  5.08s/it][A[A[A[A







 44%|████▎     | 97/222 [08:23<10:31,  5.05s/it][A[A[A[A







 44%|████▍     | 98/222 [08:29<10:45,  5.20s/it][A[A[A[A







 45%|████▍     | 99/222 [08:34<10:40,  5.21s/it][A[A[A[A







 45%|████▌     | 100/222 [08:40<11:13,  5.52s/it][A[A[A[A







 45%|████▌     | 101/222 [08:45<10:49,  5.37s/it][A[A[A[A







 46%|████▌     | 102/222 [08:50<10:43,  5.36s/it][A[A[A[A







 46%|████▋     | 103/222 [08:55<10:16,  5.18s/it][A[A[A[A







 47%|████▋     | 104/222 [09:00<09:54,  5.04s/it][A[A[A[A







 47%|████▋     | 105/222 [09:05<09:45,  5.01s/it][A[A[A[A







 48%|████▊     | 106/222 [09:10<09:32,  4.94s/it][A[A[A[A







 48%|████▊     | 107/222 [09:14<09:15,  4.83s/it][A[A[A[A







 49%|████▊     | 108/222 [09:20<09:53,  5.20s/it][A[A[A[A







 49%|████▉     | 109/222 [09:25<09:32,  5.06s/it][A[A[A[A







 50%|████▉     | 110/222 [09:30<09:22,  5.02s/it][A[A[A[A







 50%|█████     | 111/222 [09:35<09:13,  4.98s/it][A[A[A[A







 50%|█████     | 112/222 [09:39<08:56,  4.88s/it][A[A[A[A







 51%|█████     | 113/222 [09:44<08:50,  4.87s/it][A[A[A[A







 51%|█████▏    | 114/222 [09:49<08:40,  4.82s/it][A[A[A[A







 52%|█████▏    | 115/222 [09:54<08:30,  4.77s/it][A[A[A[A







 52%|█████▏    | 116/222 [09:59<08:37,  4.88s/it][A[A[A[A







 53%|█████▎    | 117/222 [10:05<09:07,  5.22s/it][A[A[A[A







 53%|█████▎    | 118/222 [10:09<08:48,  5.08s/it][A[A[A[A







 54%|█████▎    | 119/222 [10:14<08:40,  5.05s/it][A[A[A[A







 54%|█████▍    | 120/222 [10:19<08:28,  4.99s/it][A[A[A[A







 55%|█████▍    | 121/222 [10:24<08:11,  4.87s/it][A[A[A[A







 55%|█████▍    | 122/222 [10:29<08:05,  4.85s/it][A[A[A[A







 55%|█████▌    | 123/222 [10:33<07:54,  4.80s/it][A[A[A[A







 56%|█████▌    | 124/222 [10:38<07:45,  4.75s/it][A[A[A[A







 56%|█████▋    | 125/222 [10:44<08:23,  5.19s/it][A[A[A[A







 57%|█████▋    | 126/222 [10:49<08:09,  5.10s/it][A[A[A[A







 57%|█████▋    | 127/222 [10:54<07:54,  4.99s/it][A[A[A[A







 58%|█████▊    | 128/222 [10:59<07:45,  4.96s/it][A[A[A[A







 58%|█████▊    | 129/222 [11:04<07:36,  4.91s/it][A[A[A[A







 59%|█████▊    | 130/222 [11:08<07:27,  4.86s/it][A[A[A[A







 59%|█████▉    | 131/222 [11:13<07:23,  4.88s/it][A[A[A[A







 59%|█████▉    | 132/222 [11:18<07:17,  4.86s/it][A[A[A[A







 60%|█████▉    | 133/222 [11:24<07:41,  5.18s/it][A[A[A[A







 60%|██████    | 134/222 [11:29<07:29,  5.10s/it][A[A[A[A







 61%|██████    | 135/222 [11:34<07:16,  5.02s/it][A[A[A[A







 61%|██████▏   | 136/222 [11:38<07:01,  4.90s/it][A[A[A[A







 62%|██████▏   | 137/222 [11:43<06:58,  4.92s/it][A[A[A[A







 62%|██████▏   | 138/222 [11:48<06:46,  4.84s/it][A[A[A[A







 63%|██████▎   | 139/222 [11:53<06:37,  4.79s/it][A[A[A[A







 63%|██████▎   | 140/222 [11:58<06:40,  4.89s/it][A[A[A[A







 64%|██████▎   | 141/222 [12:03<06:35,  4.88s/it][A[A[A[A







 64%|██████▍   | 142/222 [12:09<07:01,  5.27s/it][A[A[A[A







 64%|██████▍   | 143/222 [12:14<06:50,  5.20s/it][A[A[A[A







 65%|██████▍   | 144/222 [12:19<06:35,  5.07s/it][A[A[A[A







 65%|██████▌   | 145/222 [12:23<06:19,  4.93s/it][A[A[A[A







 66%|██████▌   | 146/222 [12:28<06:13,  4.91s/it][A[A[A[A







 66%|██████▌   | 147/222 [12:33<06:03,  4.85s/it][A[A[A[A







 67%|██████▋   | 148/222 [12:37<05:55,  4.80s/it][A[A[A[A







 67%|██████▋   | 149/222 [12:42<05:51,  4.82s/it][A[A[A[A







 68%|██████▊   | 150/222 [12:48<06:13,  5.19s/it][A[A[A[A







 68%|██████▊   | 151/222 [12:53<06:05,  5.15s/it][A[A[A[A







 68%|██████▊   | 152/222 [12:58<05:55,  5.08s/it][A[A[A[A







 69%|██████▉   | 153/222 [13:03<05:41,  4.95s/it][A[A[A[A







 69%|██████▉   | 154/222 [13:08<05:37,  4.96s/it][A[A[A[A







 70%|██████▉   | 155/222 [13:13<05:27,  4.89s/it][A[A[A[A







 70%|███████   | 156/222 [13:17<05:18,  4.82s/it][A[A[A[A







 71%|███████   | 157/222 [13:23<05:20,  4.93s/it][A[A[A[A







 71%|███████   | 158/222 [13:27<05:15,  4.93s/it][A[A[A[A







 72%|███████▏  | 159/222 [13:34<05:34,  5.31s/it][A[A[A[A







 72%|███████▏  | 160/222 [13:39<05:24,  5.23s/it][A[A[A[A







 73%|███████▎  | 161/222 [13:43<05:11,  5.11s/it][A[A[A[A







 73%|███████▎  | 162/222 [13:48<04:58,  4.98s/it][A[A[A[A







 73%|███████▎  | 163/222 [13:53<04:52,  4.96s/it][A[A[A[A







 74%|███████▍  | 164/222 [13:58<04:43,  4.89s/it][A[A[A[A







 74%|███████▍  | 165/222 [14:02<04:34,  4.81s/it][A[A[A[A







 75%|███████▍  | 166/222 [14:07<04:30,  4.84s/it][A[A[A[A







 75%|███████▌  | 167/222 [14:14<04:48,  5.25s/it][A[A[A[A







 76%|███████▌  | 168/222 [14:19<04:38,  5.16s/it][A[A[A[A







 76%|███████▌  | 169/222 [14:23<04:27,  5.05s/it][A[A[A[A







 77%|███████▋  | 170/222 [14:28<04:17,  4.94s/it][A[A[A[A







 77%|███████▋  | 171/222 [14:33<04:12,  4.95s/it][A[A[A[A







 77%|███████▋  | 172/222 [14:38<04:04,  4.88s/it][A[A[A[A







 78%|███████▊  | 173/222 [14:42<03:55,  4.81s/it][A[A[A[A







 78%|███████▊  | 174/222 [14:47<03:56,  4.92s/it][A[A[A[A







 79%|███████▉  | 175/222 [14:52<03:50,  4.91s/it][A[A[A[A







 79%|███████▉  | 176/222 [14:58<04:00,  5.22s/it][A[A[A[A







 80%|███████▉  | 177/222 [15:03<03:52,  5.16s/it][A[A[A[A







 80%|████████  | 178/222 [15:08<03:42,  5.06s/it][A[A[A[A







 81%|████████  | 179/222 [15:13<03:33,  4.96s/it][A[A[A[A







 81%|████████  | 180/222 [15:18<03:28,  4.96s/it][A[A[A[A







 82%|████████▏ | 181/222 [15:23<03:20,  4.90s/it][A[A[A[A







 82%|████████▏ | 182/222 [15:27<03:13,  4.84s/it][A[A[A[A







 82%|████████▏ | 183/222 [15:32<03:12,  4.94s/it][A[A[A[A







 83%|████████▎ | 184/222 [15:39<03:22,  5.32s/it][A[A[A[A







 83%|████████▎ | 185/222 [15:43<03:10,  5.14s/it][A[A[A[A







 84%|████████▍ | 186/222 [15:48<03:02,  5.07s/it][A[A[A[A







 84%|████████▍ | 187/222 [15:53<02:55,  5.00s/it][A[A[A[A







 85%|████████▍ | 188/222 [15:58<02:46,  4.90s/it][A[A[A[A







 85%|████████▌ | 189/222 [16:03<02:41,  4.89s/it][A[A[A[A







 86%|████████▌ | 190/222 [16:07<02:35,  4.86s/it][A[A[A[A







 86%|████████▌ | 191/222 [16:12<02:28,  4.79s/it][A[A[A[A







 86%|████████▋ | 192/222 [16:18<02:36,  5.23s/it][A[A[A[A







 87%|████████▋ | 193/222 [16:23<02:28,  5.11s/it][A[A[A[A







 87%|████████▋ | 194/222 [16:28<02:19,  4.99s/it][A[A[A[A







 88%|████████▊ | 195/222 [16:33<02:13,  4.96s/it][A[A[A[A







 88%|████████▊ | 196/222 [16:38<02:07,  4.90s/it][A[A[A[A







 89%|████████▊ | 197/222 [16:42<02:00,  4.81s/it][A[A[A[A







 89%|████████▉ | 198/222 [16:47<01:56,  4.84s/it][A[A[A[A







 90%|████████▉ | 199/222 [16:52<01:50,  4.82s/it][A[A[A[A







 90%|█████████ | 200/222 [16:56<01:44,  4.74s/it][A[A[A[A







 91%|█████████ | 201/222 [17:03<01:49,  5.22s/it][A[A[A[A







 91%|█████████ | 202/222 [17:08<01:42,  5.11s/it][A[A[A[A







 91%|█████████▏| 203/222 [17:12<01:35,  5.01s/it][A[A[A[A







 92%|█████████▏| 204/222 [17:17<01:28,  4.90s/it][A[A[A[A







 92%|█████████▏| 205/222 [17:22<01:22,  4.85s/it][A[A[A[A







 93%|█████████▎| 206/222 [17:26<01:16,  4.76s/it][A[A[A[A







 93%|█████████▎| 207/222 [17:31<01:10,  4.68s/it][A[A[A[A







 94%|█████████▎| 208/222 [17:35<01:05,  4.68s/it][A[A[A[A







 94%|█████████▍| 209/222 [17:41<01:05,  5.05s/it][A[A[A[A







 95%|█████████▍| 210/222 [17:46<01:00,  5.03s/it][A[A[A[A







 95%|█████████▌| 211/222 [17:51<00:54,  4.97s/it][A[A[A[A







 95%|█████████▌| 212/222 [17:56<00:48,  4.86s/it][A[A[A[A







 96%|█████████▌| 213/222 [18:01<00:43,  4.88s/it][A[A[A[A







 96%|█████████▋| 214/222 [18:05<00:38,  4.84s/it][A[A[A[A







 97%|█████████▋| 215/222 [18:10<00:33,  4.79s/it][A[A[A[A







 97%|█████████▋| 216/222 [18:15<00:29,  4.90s/it][A[A[A[A







 98%|█████████▊| 217/222 [18:20<00:24,  4.86s/it][A[A[A[A







 98%|█████████▊| 218/222 [18:26<00:20,  5.21s/it][A[A[A[A







 99%|█████████▊| 219/222 [18:31<00:15,  5.13s/it][A[A[A[A







 99%|█████████▉| 220/222 [18:36<00:10,  5.02s/it][A[A[A[A







100%|█████████▉| 221/222 [18:41<00:04,  4.93s/it][A[A[A[A







100%|██████████| 222/222 [18:44<00:00,  5.07s/it]


In [None]:
print(finaltrainingset.shape) #(3543, 1920)




In [None]:
from sklearn.svm import OneClassSVM
# define outlier detection model
modellinear = OneClassSVM(kernel="linear", verbose=True)

# fit 
modellinear.fit(finaltrainingset)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='linear',
            max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=True)

In [None]:
modelpoly = OneClassSVM(kernel="poly", verbose=True)

# fit 
modelpoly.fit(finaltrainingset)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='poly',
            max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=True)

In [None]:
modelrbf = OneClassSVM(kernel="rbf", verbose=True)

# fit 
modelrbf.fit(finaltrainingset)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
            max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=True)

In [None]:
modelsigmoid = OneClassSVM(kernel="sigmoid", verbose=True)

# fit 
modelsigmoid.fit(finaltrainingset)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale',
            kernel='sigmoid', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
            verbose=True)

In [None]:
import pickle

kn = ["linear","poly","rbf","sigmoid"]
models = [modellinear,modelpoly,modelrbf,modelsigmoid]

# save model
for i in range(len(kn)):
  save_filename = os.path.join(root_path,"OneClassSVM Models/basemodel_"+kn[i])
  # save the model to disk
  pickle.dump(models[i], open(save_filename, 'wb'))

###Test

In [None]:
# load in the models
print("loading linear...")
modellinear = pickle.load(open(root_path+"OneClassSVM Models/basemodel_linear", 'rb'))
print("loading poly...")
modelpoly = pickle.load(open(root_path+"OneClassSVM Models/basemodel_poly", 'rb'))
print("loading rbf...")
modelrbf = pickle.load(open(root_path+"OneClassSVM Models/basemodel_rbf", 'rb'))
print("loading sigmoid...")
modelsigmoid = pickle.load(open(root_path+"OneClassSVM Models/basemodel_sigmoid", 'rb'))

loading linear...
loading poly...
loading rbf...
loading sigmoid...


In [None]:
#Xtestvalunknown = data_df_unknown['slice_file_name'].tolist()
X_trash, Xtestvalunknown, y_trash, y_unknown = train_test_split(data_df_unknown['slice_file_name'].tolist(), data_df_unknown['classID'].tolist(), test_size=test_size, random_state = 42)

#Xtestvalunknown = Xtestvalunknown[:25]
#X_test = X_test[:25]

testX = Xtestvalunknown + X_test

actualtestY = [-1 for i in Xtestvalunknown] + y_test
print("length of actual test y", len(actualtestY))

actualtestY = [-1 for i in Xtestvalunknown] + y_test
print("length of actual test y", len(actualtestY))

testY = [-1 for i in Xtestvalunknown]+[1 for i in X_test]
testY = np.array(testY)
print(testY.shape)

length of actual test y 1747
length of actual test y 1747
(1747,)


In [None]:
from keras import Model
from tqdm import tqdm

test_loader = SpecLoaderWnn(testX, batch_size, spectrograms_path)

finaltestset = None
for ind in tqdm(range(len(test_loader))):
  model_output = model.get_layer("global_average_pooling2d_1").output
  m = Model(inputs=model.input, outputs=model_output)
  finaloutput = m.predict(test_loader.getitem(ind))
  if ind==0:
    print(finaloutput.shape)
  if type(finaltestset) == type(None):
    finaltestset = finaloutput
  else:
    finaltestset=np.concatenate((finaltestset,finaloutput),axis=0)

  1%|          | 1/110 [00:05<09:21,  5.16s/it]

(16, 1920)


  3%|▎         | 3/110 [00:16<09:32,  5.35s/it]



  4%|▎         | 4/110 [00:21<09:16,  5.25s/it]



  5%|▍         | 5/110 [00:25<09:01,  5.15s/it]



  5%|▌         | 6/110 [00:31<08:51,  5.11s/it]



  6%|▋         | 7/110 [00:35<08:39,  5.05s/it]



  7%|▋         | 8/110 [00:40<08:30,  5.01s/it]



  8%|▊         | 9/110 [00:46<08:53,  5.28s/it]



  9%|▉         | 10/110 [00:51<08:41,  5.22s/it]



 10%|█         | 11/110 [00:56<08:30,  5.16s/it]



 11%|█         | 12/110 [01:01<08:19,  5.10s/it]



 12%|█▏        | 13/110 [01:06<08:08,  5.04s/it]



 13%|█▎        | 14/110 [01:11<08:01,  5.02s/it]



 14%|█▎        | 15/110 [01:17<08:22,  5.29s/it]



 15%|█▍        | 16/110 [01:22<08:20,  5.32s/it]



 15%|█▌        | 17/110 [01:28<08:15,  5.33s/it]



 16%|█▋        | 18/110 [01:33<08:06,  5.29s/it]



 17%|█▋        | 19/110 [01:38<07:57,  5.25s/it]



 18%|█▊        | 20/110 [01:43<07:50,  5.23s/it]



 19%|█▉        | 21/110 [01:49<08:07,  5.48s/it]



 20%|██        | 22/110 [01:55<07:52,  5.37s/it]



 21%|██        | 23/110 [02:00<07:38,  5.27s/it]



 22%|██▏       | 24/110 [02:05<07:26,  5.19s/it]



 23%|██▎       | 25/110 [02:10<07:15,  5.12s/it]



 24%|██▎       | 26/110 [02:15<07:07,  5.09s/it]



 25%|██▍       | 27/110 [02:19<06:55,  5.01s/it]



 25%|██▌       | 28/110 [02:26<07:23,  5.40s/it]



 26%|██▋       | 29/110 [02:31<07:17,  5.40s/it]



 27%|██▋       | 30/110 [02:36<07:10,  5.38s/it]



 28%|██▊       | 31/110 [02:42<07:11,  5.47s/it]



 29%|██▉       | 32/110 [02:47<06:57,  5.36s/it]



 30%|███       | 33/110 [02:53<06:52,  5.36s/it]



 31%|███       | 34/110 [02:59<07:03,  5.57s/it]



 32%|███▏      | 35/110 [03:04<06:48,  5.44s/it]



 33%|███▎      | 36/110 [03:09<06:41,  5.43s/it]



 34%|███▎      | 37/110 [03:15<06:39,  5.47s/it]



 35%|███▍      | 38/110 [03:20<06:27,  5.38s/it]



 35%|███▌      | 39/110 [03:25<06:18,  5.34s/it]



 36%|███▋      | 40/110 [03:32<06:35,  5.65s/it]



 37%|███▋      | 41/110 [03:37<06:23,  5.55s/it]



 38%|███▊      | 42/110 [03:42<06:10,  5.45s/it]



 39%|███▉      | 43/110 [03:47<05:57,  5.34s/it]



 40%|████      | 44/110 [03:52<05:47,  5.27s/it]



 41%|████      | 45/110 [03:57<05:40,  5.24s/it]



 42%|████▏     | 46/110 [04:04<05:57,  5.59s/it]



 43%|████▎     | 47/110 [04:09<05:50,  5.57s/it]



 44%|████▎     | 48/110 [04:15<05:43,  5.54s/it]



 45%|████▍     | 49/110 [04:20<05:29,  5.41s/it]



 45%|████▌     | 50/110 [04:25<05:21,  5.37s/it]



 46%|████▋     | 51/110 [04:30<05:10,  5.26s/it]



 47%|████▋     | 52/110 [04:36<05:22,  5.56s/it]



 48%|████▊     | 53/110 [04:42<05:14,  5.52s/it]



 49%|████▉     | 54/110 [04:47<05:05,  5.45s/it]



 50%|█████     | 55/110 [04:52<04:56,  5.39s/it]



 51%|█████     | 56/110 [04:58<04:47,  5.33s/it]



 52%|█████▏    | 57/110 [05:03<04:39,  5.28s/it]



 53%|█████▎    | 58/110 [05:09<04:48,  5.55s/it]



 54%|█████▎    | 59/110 [05:14<04:40,  5.50s/it]



 55%|█████▍    | 60/110 [05:20<04:32,  5.45s/it]



 55%|█████▌    | 61/110 [05:25<04:24,  5.40s/it]



 56%|█████▋    | 62/110 [05:30<04:16,  5.34s/it]



 57%|█████▋    | 63/110 [05:35<04:08,  5.30s/it]



 58%|█████▊    | 64/110 [05:41<04:02,  5.27s/it]



 59%|█████▉    | 65/110 [05:47<04:09,  5.55s/it]



 60%|██████    | 66/110 [05:52<04:02,  5.52s/it]



 61%|██████    | 67/110 [05:57<03:53,  5.44s/it]



 62%|██████▏   | 68/110 [06:03<03:49,  5.46s/it]



 63%|██████▎   | 69/110 [06:08<03:41,  5.41s/it]



 64%|██████▎   | 70/110 [06:14<03:36,  5.41s/it]



 65%|██████▍   | 71/110 [06:20<03:43,  5.74s/it]



 65%|██████▌   | 72/110 [06:26<03:35,  5.68s/it]



 66%|██████▋   | 73/110 [06:32<03:31,  5.71s/it]



 67%|██████▋   | 74/110 [06:37<03:26,  5.72s/it]



 68%|██████▊   | 75/110 [06:43<03:20,  5.72s/it]



 69%|██████▉   | 76/110 [06:48<03:11,  5.64s/it]



 70%|███████   | 77/110 [06:55<03:18,  6.02s/it]



 71%|███████   | 78/110 [07:01<03:07,  5.87s/it]



 72%|███████▏  | 79/110 [07:06<02:55,  5.65s/it]



 73%|███████▎  | 80/110 [07:11<02:44,  5.50s/it]



 74%|███████▎  | 81/110 [07:16<02:35,  5.36s/it]



 75%|███████▍  | 82/110 [07:21<02:28,  5.30s/it]



 75%|███████▌  | 83/110 [07:27<02:29,  5.55s/it]



 76%|███████▋  | 84/110 [07:33<02:22,  5.48s/it]



 77%|███████▋  | 85/110 [07:38<02:13,  5.35s/it]



 78%|███████▊  | 86/110 [07:43<02:06,  5.28s/it]



 79%|███████▉  | 87/110 [07:48<02:01,  5.28s/it]



 80%|████████  | 88/110 [07:53<01:54,  5.20s/it]



 81%|████████  | 89/110 [08:00<02:02,  5.82s/it]



 82%|████████▏ | 90/110 [08:06<01:55,  5.80s/it]



 83%|████████▎ | 91/110 [08:11<01:46,  5.61s/it]



 84%|████████▎ | 92/110 [08:16<01:37,  5.44s/it]



 85%|████████▍ | 93/110 [08:22<01:30,  5.35s/it]



 85%|████████▌ | 94/110 [08:27<01:24,  5.28s/it]



 86%|████████▋ | 95/110 [08:33<01:24,  5.63s/it]



 87%|████████▋ | 96/110 [08:38<01:17,  5.53s/it]



 88%|████████▊ | 97/110 [08:44<01:10,  5.44s/it]



 89%|████████▉ | 98/110 [08:49<01:04,  5.35s/it]



 90%|█████████ | 99/110 [08:54<00:58,  5.31s/it]



 91%|█████████ | 100/110 [08:59<00:53,  5.32s/it]



 92%|█████████▏| 101/110 [09:05<00:48,  5.40s/it]



 93%|█████████▎| 102/110 [09:12<00:46,  5.82s/it]



 94%|█████████▎| 103/110 [09:17<00:39,  5.68s/it]



 95%|█████████▍| 104/110 [09:23<00:34,  5.69s/it]



 95%|█████████▌| 105/110 [09:28<00:28,  5.63s/it]



 96%|█████████▋| 106/110 [09:34<00:22,  5.57s/it]



 97%|█████████▋| 107/110 [09:39<00:16,  5.44s/it]



 98%|█████████▊| 108/110 [09:45<00:11,  5.67s/it]



 99%|█████████▉| 109/110 [09:50<00:05,  5.54s/it]



100%|██████████| 110/110 [09:54<00:00,  5.40s/it]


In [None]:
print(finaltestset.shape) #(1747, 1920)

(1747, 1920)


In [None]:
# detect outliers in the test set
print("predicting linear...")
yhatLinear = modellinear.predict(finaltestset)
print("predicting poly...")
yhatPoly = modelpoly.predict(finaltestset)
print("predicting rbf...")
yhatRBF = modelrbf.predict(finaltestset)
print("predicting sigmoid...")
yhatSigmoid = modelsigmoid.predict(finaltestset)

predicting linear...
predicting poly...
predicting rbf...
predicting sigmoid...


In [None]:
print(yhatLinear.shape)

(1747,)


In [None]:
# evaluate
# input: x,y - numpy of shape (n,) 
# output: percentage of hits
def evaluate(x,y):
  hit = 0
  for i in range(len(y)):
    if int(x[i])==int(y[i]):
      hit+=1
  return hit/len(y)

def evaluate2(predicted, expected):
    acc = np.mean(np.array(predicted) == np.array(expected))
    print("Overall accuracy: {}".format(acc))
    acc_dict = {}
    for i in range(len(expected)):
        expected_class = expected[i]
        if expected_class not in acc_dict:
            acc_dict[expected_class] = [0, 0]
        acc_dict[expected_class][1] += 1
        if expected_class == predicted[i]:
            acc_dict[expected_class][0] += 1
    for k,v in acc_dict.items():
        print("Accuracy for class {}: {}".format(k, v[0]/v[1]))
    for average in ["macro", "weighted", "micro"]:
        f1 = f1_score(expected, predicted, average=average)
        print("{} f1 score: {}".format(average, f1))

In [None]:
kn = ["Linear","Poly","RBF","Sigmoid"]
yhats = [yhatLinear,yhatPoly,yhatRBF,yhatSigmoid]
print("Accuracy - hit rate")
print("--------------------------------")

for i in range(len(kn)):
  print("{}: {}".format(kn[i],evaluate(yhats[i],testY)))


Accuracy - hit rate
--------------------------------
Linear: 0.683457355466514
Poly: 0.6886090440755581
RBF: 0.3331425300515169
Sigmoid: 0.6794504865483686


In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatLinear)) if yhatLinear[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatLinear[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatLinear
  yhatLinear[index_yhat] = predClass

print("prediction of positive classes through base model", yhatLinear)

print("\nLinear - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatLinear, average='macro'))
print("Micro:", f1_score(actualtestY, yhatLinear, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatLinear, average='weighted'))
print("None:", f1_score(actualtestY, yhatLinear, average=None))

print("\nEvaluate\n")
evaluate2(yhatLinear, actualtestY)

length of index_normalList 505
length of normalXfilenames 505
predict using one class svm: [-1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1
 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1  1 -1
 -1 -1 -1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1]
prediction of positive classes through base model [-1  4 -1 ... -1 -1 -1]

Linear - F1 Scores
--------------------------------
Macro: 0.6233993776581975
Micro: 0.67830566685747
Weighted: 0.6598884054226275
None: [0.73704232 0.53246753 0.67241379 0.59333333 0.52601156 0.67912773]

Evaluate

Overall accuracy: 0.67830566685747
Accuracy for class -1: 0.9001161440185831
Accuracy for class 2: 0.43842364532019706
Accuracy for class 4: 0.6337209302325582
Accuracy for class 0: 0.3923444976076555
Accuracy for class 3: 0.3991228070175439
Accuracy for class 1: 0.527027027027027
macro f1 score: 0.623399377658

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatPoly)) if yhatPoly[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatPoly[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatPoly
  yhatPoly[index_yhat] = predClass

print("prediction of positive classes through base model", yhatPoly)

print("\nPoly - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatPoly, average='macro'))
print("Micro:", f1_score(actualtestY, yhatPoly, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatPoly, average='weighted'))
print("None:", f1_score(actualtestY, yhatPoly, average=None))

print("\nEvaluate\n")
evaluate2(yhatPoly, actualtestY)

length of index_normalList 482
length of normalXfilenames 482
predict using one class svm: [-1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1
 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1
 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1]
prediction of positive classes through base model [-1  4 -1 ... -1 -1 -1]

Poly - F1 Scores
--------------------------------
Macro: 0.6267071185604906
Micro: 0.6846021751574127
Weighted: 0.6653804941979486
None: [0.74412041 0.57413249 0.67241379 0.62987013 0.50154799 0.63815789]

Evaluate

Overall accuracy: 0.6846021751574127
Accuracy for class -1: 0.9186991869918699
Accuracy for class 2: 0.47783251231527096
Accuracy for class 4: 0.563953488372093
Accuracy for class 0: 0.4354066985645933
Accuracy for class 3: 0.35526315789473684
Accuracy for class 1: 0.527027027027027
macro f1 score: 0.6267071185

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatRBF)) if yhatRBF[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatRBF[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatRBF
  yhatRBF[index_yhat] = predClass

print("prediction of positive classes through base model", yhatRBF)

print("\nRBF - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatRBF, average='macro'))
print("Micro:", f1_score(actualtestY, yhatRBF, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatRBF, average='weighted'))
print("None:", f1_score(actualtestY, yhatRBF, average=None))

print("\nEvaluate\n")
evaluate2(yhatRBF, actualtestY)

length of index_normalList 1217
length of normalXfilenames 1217
predict using one class svm: [ 1 -1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1 -1  1  1 -1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1 -1  1  1 -1  1
  1  1  1  1  1  1  1 -1  1 -1  1  1 -1  1  1  1  1  1  1  1  1  1 -1 -1
  1  1  1 -1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1
  1  1  1  1]
prediction of positive classes through base model [ 4 -1  0 ...  3  2 -1]

RBF - F1 Scores
--------------------------------
Macro: 0.3466274763647119
Micro: 0.3113909559244419
Weighted: 0.288500974605474
None: [0.16247304 0.41532258 0.20437956 0.50731707 0.41577061 0.37450199]

Evaluate

Overall accuracy: 0.3113909559244419
Accuracy for class -1: 0.13124274099883856
Accuracy for class 2: 0.5123152709359606
Accuracy for class 4: 0.5465116279069767
Accuracy for class 0: 0.49282296650717705
Accuracy for class 3: 0.5087719298245614
Accuracy for class 1: 0.1891891891891892
macro f1 score: 0.34662747

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatSigmoid)) if yhatSigmoid[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatSigmoid[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatSigmoid
  yhatSigmoid[index_yhat] = predClass

print("prediction of positive classes through base model", yhatSigmoid)

print("\nSigmoid - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatSigmoid, average='macro'))
print("Micro:", f1_score(actualtestY, yhatSigmoid, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatSigmoid, average='weighted'))
print("None:", f1_score(actualtestY, yhatSigmoid, average=None))

print("\nEvaluate\n")
evaluate2(yhatSigmoid, actualtestY)

length of index_normalList 504
length of normalXfilenames 504
predict using one class svm: [-1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1
 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1  1 -1 -1 -1 -1 -1 -1  1 -1
 -1 -1 -1  1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1]
prediction of positive classes through base model [-1  4 -1 ... -1 -1 -1]

Sigmoid - F1 Scores
--------------------------------
Macro: 0.6178692444865497
Micro: 0.6742987979393246
Weighted: 0.6546729644028844
None: [0.7338403  0.50331126 0.67241379 0.56462585 0.53977273 0.69325153]

Evaluate

Overall accuracy: 0.6742987979393246
Accuracy for class -1: 0.8966318234610917
Accuracy for class 2: 0.4088669950738916
Accuracy for class 4: 0.6569767441860465
Accuracy for class 0: 0.36363636363636365
Accuracy for class 3: 0.4166666666666667
Accuracy for class 1: 0.527027027027027
macro f1 score: 0.6178692

#Using features extracted from VGGish

In [None]:
from tqdm import tqdm

class SpecLoader():
  def __init__(self, x_set, spec_dir):
    self.x = x_set
    self.spec_dir = spec_dir

  def __len__(self):
    return len(self.x)

  def getitem(self):
    specs = []
    for fileName in tqdm(self.x):
        spec = np.load(self.spec_dir + fileName + ".npy")
        specs.append(spec.transpose())
    return np.array(specs)

  def getitemlist(self):
    specs = []
    for fileName in tqdm(self.x):
        spec = np.load(self.spec_dir + fileName + ".npy")
        specs.append(spec.transpose())
    return specs

class SpecLoaderWnn(keras.utils.Sequence):
  def __init__(self, x_set, batch_size, spec_dir):
    self.x = x_set
    self.batch_size = batch_size
    self.spec_dir = spec_dir

  def __len__(self):
    return int(np.ceil(len(self.x) / self.batch_size))

  def __getitem__(self, idx):
    batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
    # batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

    batchSpecs = []
    for fileName in batch_x:
        spec = np.load(self.spec_dir + fileName + ".npy")
        batchSpecs.append(spec.transpose())
    return np.array(batchSpecs)

In [None]:
# original, shape into OneClassSVM: (3543, 96000)
# after VGGish, shape into OneClassSVM: (3543, 128)

batch_size = 16

In [None]:
from sklearn.model_selection import train_test_split

vggfeatures_path = os.path.join(root_path,"embeddings/")

data_df = pd.read_csv(csv_path)
data_df_known = data_df.loc[data_df["classID"] < 5]
data_df_unknown = data_df.loc[data_df["classID"] >= 5]

Xtrainval, X_test, Ytrainval, y_test = train_test_split(data_df_known['slice_file_name'].tolist(), data_df_known['classID'].tolist(), test_size=test_size, random_state = 42)


In [None]:
#Xtrainval = data_df_known['slice_file_name'].tolist()
#Ytrainval = data_df_known['classID'].tolist()

slvgg = SpecLoader(Xtrainval, vggfeatures_path)
trainX = slvgg.getitem()

print(trainX.shape)



  0%|          | 0/3543 [00:00<?, ?it/s][A
  2%|▏         | 56/3543 [00:00<00:06, 558.09it/s][A
  3%|▎         | 110/3543 [00:00<00:06, 552.13it/s][A
  5%|▍         | 174/3543 [00:00<00:05, 575.68it/s][A
  7%|▋         | 235/3543 [00:00<00:05, 583.86it/s][A
  8%|▊         | 295/3543 [00:00<00:05, 586.17it/s][A
 10%|▉         | 351/3543 [00:00<00:05, 575.86it/s][A
 11%|█▏        | 406/3543 [00:00<00:05, 565.29it/s][A
 13%|█▎        | 466/3543 [00:00<00:05, 573.35it/s][A
 13%|█▎        | 466/3543 [00:16<00:05, 573.35it/s][A
 14%|█▍        | 497/3543 [00:16<07:50,  6.47it/s] [A
 14%|█▍        | 498/3543 [00:17<14:21,  3.53it/s][A
 14%|█▍        | 499/3543 [00:17<19:55,  2.55it/s][A
 14%|█▍        | 500/3543 [00:18<21:22,  2.37it/s][A
 14%|█▍        | 501/3543 [00:18<22:09,  2.29it/s][A
 14%|█▍        | 502/3543 [00:19<24:50,  2.04it/s][A
 14%|█▍        | 503/3543 [00:19<25:32,  1.98it/s][A
 14%|█▍        | 504/3543 [00:20<25:57,  1.95it/s][A
 14%|█▍        | 505/3543 [0

(3543, 128, 1)





In [None]:
nsamples, x, y = trainX.shape
trainX = trainX.reshape((nsamples, x*y))

In [None]:
from sklearn.svm import OneClassSVM
# define outlier detection model
modellinear = OneClassSVM(kernel="linear", verbose=True)

# fit 
modellinear.fit(trainX)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='linear',
            max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=True)

In [None]:
modelpoly = OneClassSVM(kernel="poly", verbose=True)

# fit 
modelpoly.fit(trainX)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='poly',
            max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=True)

In [None]:
modelrbf = OneClassSVM(kernel="rbf", verbose=True)

# fit 
modelrbf.fit(trainX)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
            max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=True)

In [None]:
modelsigmoid = OneClassSVM(kernel="sigmoid", verbose=True)

# fit 
modelsigmoid.fit(trainX)

[LibSVM]

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='scale',
            kernel='sigmoid', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
            verbose=True)

In [None]:
import pickle

kn = ["linear","poly","rbf","sigmoid"]
models = [modellinear,modelpoly,modelrbf,modelsigmoid]

# save model
for i in range(len(kn)):
  save_filename = os.path.join(root_path,"OneClassSVM Models/vgg_"+kn[i])
  # save the model to disk
  pickle.dump(models[i], open(save_filename, 'wb'))

##Test

In [None]:
# load in the models
print("loading linear...")
modellinear = pickle.load(open(root_path+"OneClassSVM Models/vgg_linear", 'rb'))
print("loading poly...")
modelpoly = pickle.load(open(root_path+"OneClassSVM Models/vgg_poly", 'rb'))
print("loading rbf...")
modelrbf = pickle.load(open(root_path+"OneClassSVM Models/vgg_rbf", 'rb'))
print("loading sigmoid...")
modelsigmoid = pickle.load(open(root_path+"OneClassSVM Models/vgg_sigmoid", 'rb'))

loading linear...
loading poly...
loading rbf...
loading sigmoid...


In [None]:
#Xtestvalunknown = data_df_unknown['slice_file_name'].tolist()
X_trash, Xtestvalunknown, y_trash, y_unknown = train_test_split(data_df_unknown['slice_file_name'].tolist(), data_df_unknown['classID'].tolist(), test_size=test_size, random_state = 42)

#Xtestvalunknown = Xtestvalunknown[:25]
#X_test = X_test[:25]

testX = Xtestvalunknown + X_test

actualtestY = [-1 for i in Xtestvalunknown] + y_test
print("length of actual test y", len(actualtestY))

testY = [-1 for i in Xtestvalunknown]+[1 for i in X_test]
testY = np.array(testY)
print(testY.shape)

sltest = SpecLoader(testX, vggfeatures_path)
testX = sltest.getitem()

nsamples, x, y = testX.shape
testX = testX.reshape((nsamples, x*y))

  0%|          | 0/1747 [00:00<?, ?it/s]

length of actual test y 1747
(1747,)


100%|██████████| 1747/1747 [08:43<00:00,  3.34it/s]


In [None]:
# detect outliers in the test set
yhatLinear = modellinear.predict(testX)
yhatPoly = modelpoly.predict(testX)
yhatRBF = modelrbf.predict(testX)
yhatSigmoid = modelsigmoid.predict(testX)

In [None]:
# evaluate
# input: x,y - numpy of shape (n,) 
# output: percentage of hits
def evaluate(x,y):
  hit = 0
  for i in range(len(y)):
    if int(x[i])==int(y[i]):
      hit+=1
  return hit/len(y)

def evaluate2(predicted, expected):
    acc = np.mean(np.array(predicted) == np.array(expected))
    print("Overall accuracy: {}".format(acc))
    acc_dict = {}
    for i in range(len(expected)):
        expected_class = expected[i]
        if expected_class not in acc_dict:
            acc_dict[expected_class] = [0, 0]
        acc_dict[expected_class][1] += 1
        if expected_class == predicted[i]:
            acc_dict[expected_class][0] += 1
    for k,v in acc_dict.items():
        print("Accuracy for class {}: {}".format(k, v[0]/v[1]))
    for average in ["macro", "weighted", "micro"]:
        f1 = f1_score(expected, predicted, average=average)
        print("{} f1 score: {}".format(average, f1))

In [None]:
kn = ["Linear","Poly","RBF","Sigmoid"]
yhats = [yhatLinear,yhatPoly,yhatRBF,yhatSigmoid]
print("Accuracy - hit rate")
print("--------------------------------")

for i in range(len(kn)):
  print("{}: {}".format(kn[i],evaluate(yhats[i],testY)))


Accuracy - hit rate
--------------------------------
Linear: 0.5031482541499713
Poly: 0.5054378935317687
RBF: 0.5151688609044076
Sigmoid: 0.4968517458500286


In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatLinear)) if yhatLinear[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatLinear[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatLinear
  yhatLinear[index_yhat] = predClass

print("prediction of positive classes through base model", yhatLinear)

print("\nLinear - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatLinear, average='macro'))
print("Micro:", f1_score(actualtestY, yhatLinear, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatLinear, average='weighted'))
print("None:", f1_score(actualtestY, yhatLinear, average=None))

print("\nEvaluate\n")
evaluate2(yhatLinear, actualtestY)

length of index_normalList 888
length of normalXfilenames 888
predict using one class svm: [-1 -1  1  1  1  1 -1  1  1 -1  1  1 -1  1 -1 -1  1  1 -1  1 -1 -1 -1 -1
  1 -1 -1  1  1 -1  1  1 -1 -1 -1  1 -1  1 -1 -1 -1 -1 -1  1 -1  1  1 -1
  1  1 -1  1 -1  1  1  1  1  1 -1 -1 -1  1 -1 -1  1 -1 -1  1  1 -1 -1  1
 -1  1  1 -1 -1 -1 -1 -1 -1  1  1 -1  1  1  1 -1  1  1  1 -1 -1  1 -1  1
 -1  1  1  1]
prediction of positive classes through base model [-1 -1  0 ...  3 -1  0]

Linear - F1 Scores
--------------------------------
Macro: 0.49470339917454537
Micro: 0.4882655981682885
Weighted: 0.48954644290505456
None: [0.49534884 0.46698113 0.56953642 0.56521739 0.43612335 0.43501326]

Evaluate

Overall accuracy: 0.4882655981682885
Accuracy for class -1: 0.49477351916376305
Accuracy for class 2: 0.5123152709359606
Accuracy for class 4: 0.47674418604651164
Accuracy for class 0: 0.47368421052631576
Accuracy for class 3: 0.4342105263157895
Accuracy for class 1: 0.581081081081081
macro f1 score: 0.4947

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatPoly)) if yhatPoly[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatPoly[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatPoly
  yhatPoly[index_yhat] = predClass

print("prediction of positive classes through base model", yhatPoly)

print("\nPoly - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatPoly, average='macro'))
print("Micro:", f1_score(actualtestY, yhatPoly, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatPoly, average='weighted'))
print("None:", f1_score(actualtestY, yhatPoly, average=None))

print("\nEvaluate\n")
evaluate2(yhatPoly, actualtestY)

length of index_normalList 870
length of normalXfilenames 870
predict using one class svm: [-1 -1  1  1  1  1 -1  1  1 -1  1  1 -1  1 -1 -1  1  1 -1  1 -1 -1 -1 -1
  1 -1 -1  1  1 -1  1  1 -1 -1 -1  1 -1  1 -1 -1 -1 -1 -1  1 -1 -1  1 -1
  1  1 -1  1 -1  1  1  1  1  1 -1 -1 -1  1 -1 -1  1 -1 -1  1  1 -1 -1  1
 -1  1  1 -1 -1 -1 -1 -1 -1  1  1 -1  1  1  1 -1  1  1  1 -1 -1  1 -1  1
 -1  1  1  1]
prediction of positive classes through base model [-1 -1  0 ...  3 -1  0]

Poly - F1 Scores
--------------------------------
Macro: 0.49621033019228894
Micro: 0.49170005724098453
Weighted: 0.49270120197491896
None: [0.50287687 0.4725537  0.57718121 0.5613079  0.42600897 0.43733333]

Evaluate

Overall accuracy: 0.49170005724098453
Accuracy for class -1: 0.5075493612078978
Accuracy for class 2: 0.5073891625615764
Accuracy for class 4: 0.47674418604651164
Accuracy for class 0: 0.47368421052631576
Accuracy for class 3: 0.4166666666666667
Accuracy for class 1: 0.581081081081081
macro f1 score: 0.49621

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatRBF)) if yhatRBF[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatRBF[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatRBF
  yhatRBF[index_yhat] = predClass

print("prediction of positive classes through base model", yhatRBF)

print("\nRBF - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatRBF, average='macro'))
print("Micro:", f1_score(actualtestY, yhatRBF, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatRBF, average='weighted'))
print("None:", f1_score(actualtestY, yhatRBF, average=None))

print("\nEvaluate\n")
evaluate2(yhatRBF, actualtestY)

length of index_normalList 875
length of normalXfilenames 875
predict using one class svm: [ 1 -1 -1 -1 -1  1  1 -1 -1  1  1 -1  1 -1  1 -1  1 -1  1  1  1 -1 -1 -1
 -1 -1  1  1 -1 -1 -1  1 -1  1 -1 -1  1 -1  1 -1 -1  1  1 -1 -1  1  1  1
  1  1  1  1 -1  1  1 -1 -1  1 -1  1  1  1 -1  1 -1 -1 -1 -1  1  1 -1  1
  1 -1 -1  1 -1  1 -1  1  1  1  1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1  1 -1
  1 -1 -1  1]
prediction of positive classes through base model [ 4 -1 -1 ... -1 -1 -1]

RBF - F1 Scores
--------------------------------
Macro: 0.47725813220152413
Micro: 0.5014310246136233
Weighted: 0.5004403650519041
None: [0.51125216 0.58371041 0.34782609 0.53513514 0.45333333 0.43229167]

Evaluate

Overall accuracy: 0.5014310246136233
Accuracy for class -1: 0.5145180023228804
Accuracy for class 2: 0.4876847290640394
Accuracy for class 4: 0.48255813953488375
Accuracy for class 0: 0.6172248803827751
Accuracy for class 3: 0.4473684210526316
Accuracy for class 1: 0.2702702702702703
macro f1 score: 0.477258132

In [None]:
# get index of all 1s/normal class
index_normalList = [i for i in range(len(yhatSigmoid)) if yhatSigmoid[i]==1]
print("length of index_normalList", len(index_normalList))

testXfilenames = Xtestvalunknown + X_test
normalXfilenames = [testXfilenames[i] for i in index_normalList]
print("length of normalXfilenames", len(normalXfilenames))

normalLoader = SpecLoaderWnn(normalXfilenames, batch_size,spectrograms_path)
normal_predictions = model.predict_classes(x=normalLoader, batch_size=None)

print("predict using one class svm:",yhatSigmoid[:100])
for i in range(len(normal_predictions)):
  index_yhat = index_normalList[i]
  predClass = normal_predictions[i]
  # change the value in yhatSigmoid
  yhatSigmoid[index_yhat] = predClass

print("prediction of positive classes through base model", yhatSigmoid)

print("\nSigmoid - F1 Scores")
print("--------------------------------")
print("Macro:", f1_score(actualtestY, yhatSigmoid, average='macro'))
print("Micro:", f1_score(actualtestY, yhatSigmoid, average='micro'))
print("Weighted:", f1_score(actualtestY, yhatSigmoid, average='weighted'))
print("None:", f1_score(actualtestY, yhatSigmoid, average=None))

print("\nEvaluate\n")
evaluate2(yhatSigmoid, actualtestY)

length of index_normalList 895
length of normalXfilenames 895
predict using one class svm: [-1 -1  1  1  1  1 -1  1  1 -1  1  1  1  1 -1  1  1  1  1  1 -1 -1 -1 -1
  1 -1 -1  1  1 -1  1  1 -1 -1 -1  1 -1  1 -1 -1 -1 -1 -1  1 -1  1  1 -1
  1  1 -1  1 -1  1  1  1  1  1 -1  1 -1  1 -1 -1  1 -1 -1  1  1 -1 -1  1
 -1  1  1 -1 -1 -1 -1 -1 -1  1  1 -1  1  1 -1 -1  1  1  1 -1 -1  1 -1  1
 -1  1  1  1]
prediction of positive classes through base model [-1 -1  0 ...  3 -1 -1]

Sigmoid - F1 Scores
--------------------------------
Macro: 0.4881275174744557
Micro: 0.48196908986834575
Weighted: 0.4827822349620117
None: [0.48686515 0.45520581 0.55263158 0.53369272 0.44631579 0.45405405]

Evaluate

Overall accuracy: 0.48196908986834575
Accuracy for class -1: 0.4843205574912892
Accuracy for class 2: 0.4876847290640394
Accuracy for class 4: 0.4883720930232558
Accuracy for class 0: 0.44976076555023925
Accuracy for class 3: 0.4649122807017544
Accuracy for class 1: 0.5675675675675675
macro f1 score: 0.4881