In [119]:
import pandas as pd
import numpy as np
import csv
from DeepSvDDClass import *
import torch

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [120]:
#don't use validation dataset since there is no overfitting in svdd?
def train_test(deepsvdd_datapath = None, split_ratio:float = 0.2):
    deepsvdd_data_csv = pd.read_csv(deepsvdd_datapath)
    
    #train data only needs normal data, test data need normal and abnormal data
    data_normal = deepsvdd_data_csv[(deepsvdd_data_csv["normal/abnormal"] == "normal" )]
    data_abnormal = deepsvdd_data_csv[(deepsvdd_data_csv["normal/abnormal"] == "abnormal" )]
    
    data_normal.loc[data_normal["normal/abnormal"] == "normal", "normal/abnormal"] = 1
    
    data_abnormal.loc[data_abnormal["normal/abnormal"] == "normal", "normal/abnormal"] = 1
    data_abnormal.loc[data_abnormal["normal/abnormal"] == "abnormal", "normal/abnormal"] = -1


    train_dataset_normal, test_dataset_normal  = train_test_split(data_normal, test_size=split_ratio, shuffle = False)
    
    train_dataset = train_dataset_normal
    test_dataset = pd.concat([test_dataset_normal, data_abnormal])
    
    return shuffle(train_dataset), shuffle(test_dataset)


datapath = r'C:\Users\brech\THESIS_local\ToyADMOS\DeepSVDD_case1_ch1.csv'
X_train, X_test = train_test(deepsvdd_datapath = datapath, split_ratio=0.2)

In [121]:
X_train

Unnamed: 0,normal/abnormal,rms,zcr,kurtosis,skewness,std,CF,SF,IF,CLF,centroid,entropy
277,1,0.148673,0.155245,1.035623,-0.043591,0.148673,6.726193,1.312184,8.826004,10.691986,2046.483190,0.801180
767,1,0.167917,0.126381,0.859702,-0.042620,0.167917,5.955331,1.299818,7.740849,9.314855,1834.407457,0.800402
148,1,0.138712,0.136861,1.494711,-0.073761,0.138712,7.209190,1.343916,9.688548,11.936780,1886.025942,0.799062
301,1,0.150515,0.138059,0.698096,-0.038314,0.150515,6.643838,1.290439,8.573464,10.280748,1956.819881,0.820905
1055,1,0.144952,0.134516,0.722922,-0.018693,0.144952,6.898855,1.290117,8.900330,10.666333,1946.666784,0.804632
...,...,...,...,...,...,...,...,...,...,...,...,...
181,1,0.166713,0.114225,0.703503,-0.042665,0.166713,5.998332,1.289464,7.734633,9.264990,1707.899402,0.808314
257,1,0.156259,0.123044,0.788500,-0.063075,0.156259,6.399629,1.290669,8.259804,9.901114,1778.056707,0.795954
449,1,0.158786,0.093675,0.538637,-0.086377,0.158786,6.297803,1.284346,8.088556,9.684534,1488.870780,0.763011
150,1,0.136370,0.129126,0.869249,-0.026097,0.136370,7.332977,1.297246,9.512675,11.435392,1812.774436,0.736767


In [122]:
#target values y: 1 for positive(normal) and -1 for negative sample
Y_train_numpy = X_train["normal/abnormal"].to_numpy().reshape(-1, 1) #dimension is now (1080,1) instead of (1080,)
Y_test_numpy = X_test["normal/abnormal"].to_numpy().reshape(-1, 1) #dimension is now (1080,1) instead of (1080,)

#drop column of norm/abnorm, so train and test are only filled with the feature values
X_train = X_train.drop(columns=["normal/abnormal"])
X_test = X_test.drop(columns=["normal/abnormal"])

X_train_numpy = X_train.to_numpy() 
X_test_numpy = X_test.to_numpy()

print(X_train_numpy.shape, X_test_numpy.shape, Y_train_numpy.shape, Y_test_numpy.shape)

(1080, 11) (534, 11) (1080, 1) (534, 1)


In [134]:
#figure out which comibination gives the best results
from sklearn.model_selection import learning_curve, GridSearchCV

param_grid = [
    {"kernel": ["rbf"], "gamma": [0.1, 0.2, 0.5], "C": [0.1, 0.5, 1]},
    {"kernel": ["linear"], "C": [0.1, 0.5, 1]},
    {"kernel": ["poly"], "C": [0.1, 0.5, 1], "degree": [2, 3, 4, 5]},
]

svdd = GridSearchCV(BaseSVDD(display='off'), param_grid, cv=5, scoring="accuracy")
svdd.fit(X_train_numpy, Y_train_numpy)
print("best parameters:")
print(svdd.best_params_)
print("\n")

# 
best_model = svdd.best_estimator_
means = svdd.cv_results_["mean_test_score"]
stds = svdd.cv_results_["std_test_score"]

for mean, std, params in zip(means, stds, svdd.cv_results_["params"]):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print()
















































best parameters:
{'C': 0.1, 'kernel': 'linear'}


0.491 (+/-0.073) for {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}
0.369 (+/-0.070) for {'C': 0.1, 'gamma': 0.2, 'kernel': 'rbf'}
0.152 (+/-0.060) for {'C': 0.1, 'gamma': 0.5, 'kernel': 'rbf'}
0.515 (+/-0.073) for {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}
0.360 (+/-0.065) for {'C': 0.5, 'gamma': 0.2, 'kernel': 'rbf'}
0.155 (+/-0.058) for {'C': 0.5, 'gamma': 0.5, 'kernel': 'rbf'}
0.498 (+/-0.079) for {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
0.360 (+/-0.065) for {'C': 1, 'gamma': 0.2, 'kernel': 'rbf'}
0.152 (+/-0.056) for {'C': 1, 'gamma': 0.5, 'kernel': 'rbf'}
1.000 (+/-0.000) for {'C': 0.1, 'kernel': 'linear'}
1.000 (+/-0.000) for {'C': 0.5, 'kernel': 'linear'}
1.000 (+/-0.000) for {'C': 1, 'kernel': 'linear'}
1.000 (+/-0.000) for {'C': 0.1, 'degree': 2, 'kernel': 'poly'}
1.000 (+/-0.000) for {'C': 0.1, 'degree': 3, 'kernel': 'poly'}
0.998 (+/-0.005) for {'C': 0.1, 'degree': 4, 'kernel': 'poly'}
0.996 (+/-0.004) for {'C': 0.1, 'degree': 5, '

In [145]:
#using example 2 from https://github.com/iqiukp/SVDD-Python
X_train_numpy = X_train_numpy.astype(np.double) #cvxopt does not support ints, only doubles
X_test_numpy = X_test_numpy.astype(np.double) 
Y_train_numpy = Y_train_numpy.astype(np.double) 
Y_test_numpy = Y_test_numpy.astype(np.double) 

svdd = BaseSVDD(C = 0.5, degree = 5, kernel='poly') 
svdd.fit(X_train_numpy,  Y_train_numpy)
y_test_predict = svdd.predict(X_test_numpy, Y_test_numpy)

#svdd = BaseSVDD(C = 0.1, degree =  2, kernel =  'poly') #52.06 percent accuracy
#svdd = BaseSVDD(C = 0.1, degree =  4, kernel =  'poly') #55.05 percent accuracy
#svdd = BaseSVDD(C = 0.5, degree =  5, kernel =  'poly') #58.05

#svdd = BaseSVDD(C = 0.1, gamma=0.3, kernel='linear')  #50.56 percent accuracy


#svdd = BaseSVDD(C = 0.1, gamma =  0.1, kernel =  'rbf')  #75.84 percent accuracy  <<=======
#svdd = BaseSVDD(C = 0.1, gamma =  0.2, kernel =  'rbf')  #69.10 percent accuracy
#svdd = BaseSVDD(C = 0.1, gamma =  0.5, kernel =  'rbf')  #61.23 percent accuracy

#svdd = BaseSVDD(C = 0.5, gamma =  0.1, kernel =  'rbf')  #75.46 percent accuracy
#svdd = BaseSVDD(C = 1,   gamma =  0.1, kernel =  'rbf')  #75.28 percent accuracy





*** Fitting of the SVDD model is completed. ***

running time         = 2.1580 seconds
kernel function      = poly
iterations           = 20
number of samples    = 1080
number of features   = 11
number of SVs        = 2
ratio of SVs         = 0.1852 %
accuracy             = 97.6852 %




*** Prediction of the provided data is completed. ***

running time         = 0.0320 seconds
number of samples    = 534
number of alarm      = 60
accuracy             = 58.0524 %


