In [1]:
import numpy as np
import pandas as pd
from src.PDLSSVM import PDLSSVM
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix, accuracy_score

kf = KFold(n_splits=5, shuffle=True, random_state=1234)

iris = load_iris()
X = iris['data'][:100]
y = iris['target'][:100]
y[:50] = -1

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [3]:
from tqdm.notebook import tqdm

rho_search_space = [1e-6, 1e-5, 1e-4, 1e-3]
c_search_space   = [1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8]
c1_search_space  = [1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8]
c2_search_space  = [1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

  0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0,rho,c,c1,c2,Acc
0,0.000001,1.000000e-08,1.000000e-08,1.000000e-08,0.8375
1,0.000001,1.000000e-08,1.000000e-08,1.000000e-07,0.8375
2,0.000001,1.000000e-08,1.000000e-08,1.000000e-06,0.8375
3,0.000001,1.000000e-08,1.000000e-08,1.000000e-05,0.8375
4,0.000001,1.000000e-08,1.000000e-08,1.000000e-04,0.8375
...,...,...,...,...,...
16379,0.001000,1.000000e+08,1.000000e+08,1.000000e+04,0.5250
16380,0.001000,1.000000e+08,1.000000e+08,1.000000e+05,0.5250
16381,0.001000,1.000000e+08,1.000000e+08,1.000000e+06,0.5250
16382,0.001000,1.000000e+08,1.000000e+08,1.000000e+07,0.5250


In [12]:
results_df.sort_values('Acc', ascending=False)

Unnamed: 0,rho,c,c1,c2,Acc
3073,0.000001,100000.0,1.000000e-08,1.000000e-07,1.0000
6928,0.000010,10000.0,1.000000e-07,1.000000e-08,1.0000
6960,0.000010,10000.0,1.000000e-05,1.000000e-08,1.0000
6946,0.000010,10000.0,1.000000e-06,1.000000e-06,1.0000
6945,0.000010,10000.0,1.000000e-06,1.000000e-07,1.0000
...,...,...,...,...,...
2672,0.000001,1000.0,1.000000e-01,1.000000e-08,0.0125
2562,0.000001,1000.0,1.000000e-08,1.000000e-06,0.0125
2561,0.000001,1000.0,1.000000e-08,1.000000e-07,0.0125
2656,0.000001,1000.0,1.000000e-02,1.000000e-08,0.0125


In [14]:
rho = 1e-4
c   = 1e+5
c1  = 1e-7
c2  = 1e-8

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
confusion_matrix(y_test, pred)

Training Done
Total Running Time: 0.0587
Prediction Done
Total Running Time: 0.0002


array([[ 7,  0],
       [ 0, 13]])