In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from src.PDLSSVM import PDLSSVM
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix, accuracy_score

kf = KFold(n_splits=5, shuffle=True, random_state=1234)

# iris dataset

In [2]:
iris = load_iris()
X = iris['data'][:100]
y = iris['target'][:100]
y[:50] = -1

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [3]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




KeyboardInterrupt: 

In [None]:
results_df.sort_values('Acc', ascending=False)

In [None]:
#PDLSSVM
rho = 1e-3
c   = 1e+8
c1  = 1e-8
c2  = 1e-8

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

In [None]:
# LSSVM
rho = 1e-4
c   = 1e+5
c1  = 0
c2  = 0

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

In [None]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X, y)
pred = clf.predict(X_test)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

# sonar dataset

In [None]:
df = pd.read_table("data/sonar.all-data", sep=",", header=None)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].valuesa

y[np.isin(y, "R")] = -1
y[np.isin(y, "M")] = 1
y = np.expand_dims(y, -1).astype('int64')

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234, stratify=y)

In [None]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}
kf = KFold(n_splits=5, shuffle=True, random_state=1234)

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    pred.astype('int64')
                    
                    acc = accuracy_score(y_val, pred)1
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

In [None]:
results_df.sort_values('Acc', ascending=False)

In [None]:
#PDLSSVM
rho = 1e-5
c   = 1e+4
c1  = 1e+3
c2  = 1e-2

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

In [None]:
# LSSVM
rho = 1e-5
c   = 1e+4
c1  = 0
c2  = 0

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

In [None]:
clf = SVC()
clf.fit(X, y)
pred = clf.predict(X_test)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

# dbworld dataset

In [None]:
data = np.load("data/dbworld.npy")
X = data[:, :-1]
y = data[:, -1]

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234, stratify=y)

In [None]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}
kf = KFold(n_splits=5, shuffle=True, random_state=1234)

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    pred.astype('int64')
                    
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

In [None]:
results_df.sort_values('Acc', ascending=False)

In [None]:
#PDLSSVM
rho = 1e-3
c   = 1e-3
c1  = 1e-5
c2  = 1e-5

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

In [None]:
#LSSVM
rho = 1e-3
c   = 1e-3
c1  = 0
c2  = 0

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

In [None]:
from sklearn.svm import SVC

clf = SVC()
clf.fit(X, y)
pred = clf.predict(X_test)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)

# heart attack

In [None]:
data = pd.read_csv("data/heart_attack.csv")
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=1234, shuffle=True, stratify=y)

In [None]:
rho_search_space = [1e-5, 1e-4]
c_search_space   = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c1_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]
c2_search_space  = [1e-8, 1e-6, 1e-4, 1e-2, 1e+2, 1e+4, 1e+6, 1e+8]

results_dict = {"rho":[], "c": [], "c1": [], "c2": [], "Acc": []}
kf = KFold(n_splits=5, shuffle=True, random_state=1234)

for rho in tqdm(rho_search_space):
    for c in c_search_space:
        for c1 in c1_search_space:
            for c2 in c2_search_space:
                cv_acc = []
                for train_index, val_index in kf.split(X):
                    X_train, X_val = X[train_index], X[val_index]
                    y_train, y_val = y[train_index], y[val_index]
                
                    clf = PDLSSVM(rho, c, c1, c2)
                    clf.fit(X_train, y_train)
                    pred, _, _ = clf.predict(X_val)
                    pred.astype('int64')
                    
                    acc = accuracy_score(y_val, pred)
                    cv_acc.append(acc)
                
                cv_acc = np.mean(cv_acc)
                results_dict['rho'].append(rho)
                results_dict['c'].append(c)
                results_dict['c1'].append(c1)
                results_dict['c2'].append(c2)
                results_dict['Acc'].append(cv_acc)
                
results_df = pd.DataFrame.from_dict(results_dict)
results_df

In [None]:
results_df.sort_values('Acc', ascending=False)

In [None]:
#PDLSSVM
rho = 1e-3
c   = 1e-3
c1  = 1e-5
c2  = 1e-5

clf = PDLSSVM(rho, c, c1, c2)
clf.fit(X, y, verbose=True)
pred, _, _ = clf.predict(X_test, verbose=True)
# confusion_matrix(y_test, pred)
accuracy_score(y_test, pred)