In [1]:
import numpy as np
import pandas as pd
from numpy.typing import NDArray
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from skopt import BayesSearchCV
from skopt.space import Real

In [2]:
"""Things that need to prepare for the input.
    z_norm, y_bin, w_aux, cp, kernel_fcn, params

    cp is StratifiedKFold
    StratifiedKFold(n_splits=opts.cv_folds, shuffle=True, random_state = 0)

    cv_folds
"""

'Things that need to prepare for the input.\n    z_norm, y_bin, w_aux, cp, kernel_fcn, params\n\n    cp is StratifiedKFold\n    StratifiedKFold(n_splits=opts.cv_folds, shuffle=True, random_state = 0)\n\n    cv_folds\n'

In [3]:
z_norm = pd.read_csv("../tests/pythia/fitmatsvm/znorm.csv", header=None).values
y_bin = pd.read_csv("../tests/pythia/fitmatsvm/ybin.csv", header=None).values
w_aux = pd.read_csv("../tests/pythia/fitmatsvm/waux.csv", header=None).values
kernel_fcn = "gaussian"
params = pd.read_csv("../tests/pythia/fitmatsvm/params.csv", header=None).values

z_norm = np.array(z_norm)
y_bin = np.array(y_bin)
w_aux = np.array(w_aux)
params = np.array(params)

cv_folds = 5

cp = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=3530723506)

In [4]:
#get first column of y_bin
y_bin = y_bin[:, 0]
print(y_bin)

[0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 1 1 1 0 1 1 0 1 1 1 1
 1 1 0 0 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 0 0 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1
 1 0 0 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 0 0 0 1 1 1 1 0 0 1 1 0 1 0 0 0 0 1 0
 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 0 0 0 0
 0 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 0 0 1]


In [5]:
w_aux = w_aux[:, 0]
print(w_aux)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [6]:
params = params[1]
print(params)

[nan nan]


In [10]:
def fitmatsvm(
        z_norm: NDArray[np.double],
        y_bin: NDArray[np.double],
        w_aux: NDArray[np.double],
        cp: StratifiedKFold,
        kernel_fcn: str,
        params: NDArray[np.double],
    ):

    if kernel_fcn == "gaussian":
        kernel_fcn = "rbf"
    elif kernel_fcn == "polynomial":
        kernel_fcn = "poly"

    elif kernel_fcn == "linear":
        kernel_fcn = "linear"
    else:
        raise ValueError(f"Unsupported kernel function: {kernel_fcn}. \
                         Supported kernels are 'gaussian', 'polynomial', and 'linear'.")


    if np.any(np.isnan(params)):
        param_space = {
            "C": Real(2**-10, 2**4, prior='log-uniform'),
            "gamma": Real(2**-10, 2**4, prior='log-uniform'),
        }
        svm_model = SVC(kernel=kernel_fcn, random_state=3530723506,
                        probability=True)

        bayes_search = BayesSearchCV(
            estimator=svm_model,
            n_iter=30,
            search_spaces=param_space,
            cv = 5,
            verbose=0,
        )


        bayes_search.fit(z_norm, y_bin, sample_weight=w_aux)

        best_svm = bayes_search.best_estimator_
        c = bayes_search.best_params_["C"]
        g = bayes_search.best_params_["gamma"]

        y_sub = best_svm.predict(z_norm)
        p_sub = best_svm.predict_proba(z_norm)[:, 1]

        y_hat = y_sub
        p_hat = p_sub

    else:
        c = params[0]
        g = params[1]

        best_svm = SVC(C=c, gamma=g, kernel=kernel_fcn)
        y_sub = np.zeros_like(y_bin)
        p_sub = np.zeros_like(y_bin, dtype=float)

        for train_index, test_index in cp.split(z_norm, y_bin):
            best_svm.fit(z_norm[train_index], y_bin[train_index], sample_weight = w_aux[train_index])
            y_sub[test_index] = best_svm.predict(z_norm[test_index])
            p_sub[test_index] = best_svm.predict_proba(z_norm[test_index])[:,1]

        best_svm.fit(z_norm, y_bin, sample_weight=w_aux)
        y_hat = best_svm.predict(z_norm)
        p_hat = best_svm.predict_proba(z_norm)[:, 1]

    return best_svm, y_sub, p_sub, y_hat, p_hat, c, g

In [11]:
#get the first column of z_norm

best_svm, y_sub, p_sub, y_hat, p_hat, c, g = fitmatsvm(z_norm, y_bin, w_aux, cp, kernel_fcn, params)

In [12]:
print(c)
print(g)

15.999999999999993
0.06245715268184241
