## Library used and deeclare data needed

In [3]:
import numpy as np
from numpy.typing import NDArray
from scipy.stats import zscore
from pytictoc import TicToc
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
import pandas as pd
from sklearn.metrics import accuracy_score

# from matilda.data.model import AlgorithmSummary
# from matilda.data.option import Opts

In [4]:
kernel_fcn = 'rbf'
opts_csv_fold = 5
nalgos = 10

# prepare for y, transpose it
y = pd.read_csv('./data/ybin.csv')
y = y.values.tolist()

# prepare for z, normalise it
z = pd.read_csv('./data/z.csv').values.tolist()
z_norm = zscore(z, axis = 0, ddof = 1).tolist()

### Training

In [7]:
def fit_libsvm(z, y, kkv, kernel_given):
    accuracy= dict()
    for k, v in kkv.items():
        train_index, test_index = v[0], v[1]
        # prepare training data
        x_train = [z[i] for i in train_index]
        y_train = [y[i] for i in train_index]
        # prepare test data
        x_test = [z[i] for i in test_index]
        y_test = [y[i] for i in test_index]
        svm = SVC(kernel=kernel_given, C=1.0, random_state = 0)
        svm.fit(x_train, y_train)
        y_pred = svm.predict(x_test)
        # calculate accuracy
        accuracy[k] = accuracy_score(y_test, y_pred)
        
    return accuracy

In [11]:
t = TicToc()
t.tic()

for i in range(nalgos):
    t_inner = TicToc()
    t_inner.tic()

    state = np.random.get_state()
    np.random.seed(0)  # equivalent to MATLAB's rng('default') ?

    # REQUIRE: Test case for validation the result
    y_b = [row[i] for row in y]
    skf = StratifiedKFold(n_splits = opts_csv_fold, shuffle = True, random_state = 0)
    
    kkv= dict()
    for i, (train_index, test_index) in enumerate(skf.split(np.zeros(len(y_b)), y_b)):
        kkv[i] = [train_index.tolist(), test_index.tolist()]
    # start training using svm
    svm_res = fit_libsvm(z_norm, y_b, kkv, kernel_fcn)
    
# visualise accuracy score
for k, v in svm_res.items():
    print(f'{k} fold: accuracy score = {v}')

    

0 fold: accuracy score = 0.8837209302325582
1 fold: accuracy score = 0.9047619047619048
2 fold: accuracy score = 0.8333333333333334
3 fold: accuracy score = 0.8333333333333334
4 fold: accuracy score = 0.8571428571428571
