In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from src.PDLSSVM import PDLSSVM
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix, accuracy_score

kf = KFold(n_splits=5, shuffle=True, random_state=1234)

# iris dataset

In [18]:
iris = load_iris()
X = iris['data'][:100]
y = iris['target'][:100]
y[:50] = -1

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [3]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))




Unnamed: 0,rho,c,c1,c2,Acc
0,0.00001,1.000000e-08,1.000000e-08,1.000000e-08,0.8375
1,0.00001,1.000000e-08,1.000000e-08,1.000000e-06,0.8375
2,0.00001,1.000000e-08,1.000000e-08,1.000000e-04,0.8375
3,0.00001,1.000000e-08,1.000000e-08,1.000000e-02,0.8375
4,0.00001,1.000000e-08,1.000000e-08,1.000000e+02,0.8375
...,...,...,...,...,...
1019,0.00010,1.000000e+08,1.000000e+08,1.000000e-02,1.0000
1020,0.00010,1.000000e+08,1.000000e+08,1.000000e+02,1.0000
1021,0.00010,1.000000e+08,1.000000e+08,1.000000e+04,1.0000
1022,0.00010,1.000000e+08,1.000000e+08,1.000000e+06,1.0000


In [4]:
results_df.sort_values('Acc', ascending=False)

Unnamed: 0,rho,c,c1,c2,Acc
1023,0.00010,1.000000e+08,1.000000e+08,1.000000e+08,1.0000
388,0.00001,1.000000e+06,1.000000e-08,1.000000e+02,1.0000
382,0.00001,1.000000e+04,1.000000e+08,1.000000e+06,1.0000
383,0.00001,1.000000e+04,1.000000e+08,1.000000e+08,1.0000
384,0.00001,1.000000e+06,1.000000e-08,1.000000e-08,1.0000
...,...,...,...,...,...
187,0.00001,1.000000e-04,1.000000e+08,1.000000e-02,0.4625
186,0.00001,1.000000e-04,1.000000e+08,1.000000e-04,0.4625
185,0.00001,1.000000e-04,1.000000e+08,1.000000e-06,0.4625
184,0.00001,1.000000e-04,1.000000e+08,1.000000e-08,0.4625


In [19]:
#PDLSSVM
rho = 1e-5
c   = 1e+6
c1  = 1e-8
c2  = 1e+2

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.092
Prediction Done
Total Running Time: 0.0


1.0

In [20]:
# LSSVM
rho = 1e-5
c   = 1e+6
c1  = 0
c2  = 0

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.11
Prediction Done
Total Running Time: 0.001


1.0

In [21]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X, y)
pred = clf.predict(X_test)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

1.0

# sonar dataset

In [22]:
df = pd.read_table("data/sonar.all-data", sep=",", header=None)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

y[np.isin(y, "R")] = -1
y[np.isin(y, "M")] = 1
y = np.expand_dims(y, -1).astype('int64')

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234, stratify=y)

In [6]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}
kf = KFold(n_splits=5, shuffle=True, random_state=1234)

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    pred.astype('int64')
                    
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))




Unnamed: 0,rho,c,c1,c2,Acc
0,0.00001,1.000000e-08,1.000000e-08,1.000000e-08,0.541176
1,0.00001,1.000000e-08,1.000000e-08,1.000000e-06,0.541176
2,0.00001,1.000000e-08,1.000000e-08,1.000000e-04,0.541176
3,0.00001,1.000000e-08,1.000000e-08,1.000000e-02,0.541176
4,0.00001,1.000000e-08,1.000000e-08,1.000000e+02,0.541176
...,...,...,...,...,...
1019,0.00010,1.000000e+08,1.000000e+08,1.000000e-02,0.542424
1020,0.00010,1.000000e+08,1.000000e+08,1.000000e+02,0.734403
1021,0.00010,1.000000e+08,1.000000e+08,1.000000e+04,0.734403
1022,0.00010,1.000000e+08,1.000000e+08,1.000000e+06,0.734403


In [7]:
results_df.sort_values('Acc', ascending=False)

Unnamed: 0,rho,c,c1,c2,Acc
355,0.00001,10000.0,1.000000e+02,1.000000e-02,0.758645
420,0.00001,1000000.0,1.000000e+02,1.000000e+02,0.752763
460,0.00001,100000000.0,1.000000e-06,1.000000e+02,0.752763
438,0.00001,1000000.0,1.000000e+06,1.000000e+06,0.752763
439,0.00001,1000000.0,1.000000e+06,1.000000e+08,0.752763
...,...,...,...,...,...
378,0.00001,10000.0,1.000000e+08,1.000000e-04,0.457398
379,0.00001,10000.0,1.000000e+08,1.000000e-02,0.457398
442,0.00001,1000000.0,1.000000e+08,1.000000e-04,0.457398
441,0.00001,1000000.0,1.000000e+08,1.000000e-06,0.457398


In [23]:
#PDLSSVM
rho = 1e-5
c   = 1e+4
c1  = 1e+2
c2  = 1e-2

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.113
Prediction Done
Total Running Time: 0.0


0.7142857142857143

In [24]:
# LSSVM
rho = 1e-5
c   = 1e+4
c1  = 0
c2  = 0

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.11
Prediction Done
Total Running Time: 0.0


0.7619047619047619

In [25]:
clf = SVC()
clf.fit(X, y)
pred = clf.predict(X_test)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

  return f(*args, **kwargs)


0.8333333333333334

# dbworld dataset

In [26]:
data = np.load("data/dbworld.npy")
X = data[:, :-1]
y = data[:, -1]

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234, stratify=y)

In [9]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}
kf = KFold(n_splits=5, shuffle=True, random_state=1234)

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    pred.astype('int64')
                    
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))




Unnamed: 0,rho,c,c1,c2,Acc
0,0.00001,1.000000e-08,1.000000e-08,1.000000e-08,0.843636
1,0.00001,1.000000e-08,1.000000e-08,1.000000e-06,0.863636
2,0.00001,1.000000e-08,1.000000e-08,1.000000e-04,0.863636
3,0.00001,1.000000e-08,1.000000e-08,1.000000e-02,0.863636
4,0.00001,1.000000e-08,1.000000e-08,1.000000e+02,0.863636
...,...,...,...,...,...
1019,0.00010,1.000000e+08,1.000000e+08,1.000000e-02,0.763636
1020,0.00010,1.000000e+08,1.000000e+08,1.000000e+02,0.843636
1021,0.00010,1.000000e+08,1.000000e+08,1.000000e+04,0.843636
1022,0.00010,1.000000e+08,1.000000e+08,1.000000e+06,0.843636


In [10]:
results_df.sort_values('Acc', ascending=False)

Unnamed: 0,rho,c,c1,c2,Acc
129,0.00001,1.000000e-04,1.000000e-08,1.000000e-06,0.863636
193,0.00001,1.000000e-02,1.000000e-08,1.000000e-06,0.863636
639,0.00010,1.000000e-06,1.000000e+08,1.000000e+08,0.863636
640,0.00010,1.000000e-04,1.000000e-08,1.000000e-08,0.863636
165,0.00001,1.000000e-04,1.000000e+02,1.000000e+04,0.863636
...,...,...,...,...,...
556,0.00010,1.000000e-08,1.000000e+04,1.000000e+02,0.449091
555,0.00010,1.000000e-08,1.000000e+04,1.000000e-02,0.449091
554,0.00010,1.000000e-08,1.000000e+04,1.000000e-04,0.449091
553,0.00010,1.000000e-08,1.000000e+04,1.000000e-06,0.449091


In [27]:
#PDLSSVM
rho = 1e-5
c   = 1e-4
c1  = 1e-8
c2  = 1e-6

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, b_flag=False, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.147
Prediction Done
Total Running Time: 0.0


0.9230769230769231

In [28]:
#LSSVM
rho = 1e-5
c   = 1e-4
c1  = 0
c2  = 0

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, b_flag=False, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.14
Prediction Done
Total Running Time: 0.0


0.9230769230769231

In [29]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X, y)
pred = clf.predict(X_test)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

1.0

# heart attack

In [30]:
data = pd.read_csv("data/heart_attack.csv")
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234, shuffle=True, stratify=y)

In [12]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}
kf = KFold(n_splits=5, shuffle=True, random_state=1234)

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    pred.astype('int64')
                    
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))




Unnamed: 0,rho,c,c1,c2,Acc
0,0.00001,1.000000e-08,1.000000e-08,1.000000e-08,0.545068
1,0.00001,1.000000e-08,1.000000e-08,1.000000e-06,0.545068
2,0.00001,1.000000e-08,1.000000e-08,1.000000e-04,0.545068
3,0.00001,1.000000e-08,1.000000e-08,1.000000e-02,0.545068
4,0.00001,1.000000e-08,1.000000e-08,1.000000e+02,0.545068
...,...,...,...,...,...
1019,0.00010,1.000000e+08,1.000000e+08,1.000000e-02,0.805697
1020,0.00010,1.000000e+08,1.000000e+08,1.000000e+02,0.818282
1021,0.00010,1.000000e+08,1.000000e+08,1.000000e+04,0.818282
1022,0.00010,1.000000e+08,1.000000e+08,1.000000e+06,0.818282


In [13]:
results_df.sort_values('Acc', ascending=False)

Unnamed: 0,rho,c,c1,c2,Acc
358,0.00001,10000.00,100.0,1.000000e+06,0.830697
356,0.00001,10000.00,100.0,1.000000e+02,0.830697
382,0.00001,10000.00,100000000.0,1.000000e+06,0.830697
381,0.00001,10000.00,100000000.0,1.000000e+04,0.830697
380,0.00001,10000.00,100000000.0,1.000000e+02,0.830697
...,...,...,...,...,...
752,0.00010,0.01,1000000.0,1.000000e-08,0.367687
761,0.00010,0.01,100000000.0,1.000000e-06,0.367687
754,0.00010,0.01,1000000.0,1.000000e-04,0.367687
755,0.00010,0.01,1000000.0,1.000000e-02,0.367687


In [31]:
#PDLSSVM
rho = 1e-5
c   = 1e+5
c1  = 1e+3
c2  = 1e+6

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.11
Prediction Done
Total Running Time: 0.001


0.8852459016393442

In [32]:
#LSSVM
rho = 1e-5
c   = 1e+5
c1  = 0
c2  = 0

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

Training Done
Total Running Time: 0.132
Prediction Done
Total Running Time: 0.0


0.8688524590163934

In [33]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X, y)
pred = clf.predict(X_test)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

0.6885245901639344