In [21]:
import numpy as np
import pandas as pd
from numpy.typing import NDArray
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from skopt import BayesSearchCV
from skopt.space import Real

In [22]:
"""Things that need to prepare for the input.
    z_norm, y_bin, w_aux, cp, kernel_fcn, params

    cp is StratifiedKFold
    StratifiedKFold(n_splits=opts.cv_folds, shuffle=True, random_state = 0)

    cv_folds
"""

'Things that need to prepare for the input.\n    z_norm, y_bin, w_aux, cp, kernel_fcn, params\n\n    cp is StratifiedKFold\n    StratifiedKFold(n_splits=opts.cv_folds, shuffle=True, random_state = 0)\n\n    cv_folds\n'

In [23]:
z_norm = pd.read_csv("../tests/pythia/fitmatsvm/znorm.csv", header=None).values
y_bin = pd.read_csv("../tests/pythia/fitmatsvm/ybin.csv", header=None).values
w_aux = pd.read_csv("../tests/pythia/fitmatsvm/waux.csv", header=None).values
kernel_fcn = "gaussian"
params = pd.read_csv("../tests/pythia/fitmatsvm/params.csv", header=None).values

z_norm = np.array(z_norm)
y_bin = np.array(y_bin)
w_aux = np.array(w_aux)
params = np.array(params)

cv_folds = 5

cp = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=3530723506)

In [24]:
#get first column of y_bin
y_bin = y_bin[:, 0]
print(y_bin)

[0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 1 1 1 0 1 1 0 1 1 1 1
 1 1 0 0 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 0 0 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1
 1 0 0 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 0 0 0 1 1 1 1 0 0 1 1 0 1 0 0 0 0 1 0
 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 0 0 0 0
 0 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 0 0 1]


In [25]:
w_aux = w_aux[:, 0]
print(w_aux)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [26]:
params = params[0]
print(params)

[nan nan]


In [30]:
def fitmatsvm(
        z_norm: NDArray[np.double],
        y_bin: NDArray[np.double],
        w_aux: NDArray[np.double],
        cp: StratifiedKFold,
        kernel_fcn: str,
        params: NDArray[np.double],
    ):

    if kernel_fcn == "gaussian":
        kernel_fcn = "rbf"
    elif kernel_fcn == "polynomial":
        kernel_fcn = "poly"
    elif kernel_fcn == "linear":
        kernel_fcn = "linear"
    else:
        raise ValueError(f"Unsupported kernel function: {kernel_fcn}. \
                         Supported kernels are 'gaussian', 'polynomial', and 'linear'.")


    if np.any(np.isnan(params)):
        param_space = {
            "C": Real(2**-10, 2**4, prior='log-uniform'),
            "gamma": Real(2**-10, 2**4, prior='log-uniform'),
        }
        svm_model = SVC(kernel=kernel_fcn, random_state=3530723506,
                        probability=True)

        bayes_search = BayesSearchCV(
            estimator=svm_model,
            n_iter=30,
            search_spaces=param_space,
            cv = 5,
            verbose=0,
        )


        bayes_search.fit(z_norm, y_bin, sample_weight=w_aux)

        best_svm = bayes_search.best_estimator_
        c = bayes_search.best_params_["C"]
        g = bayes_search.best_params_["gamma"]

        y_sub = best_svm.predict(z_norm)
        p_sub = best_svm.predict_proba(z_norm)[:, 1]

        y_hat = y_sub
        p_hat = p_sub

    else:
        c = params[0]
        g = params[1]

        best_svm = SVC(C=c, gamma=g, kernel=kernel_fcn)
        y_sub = np.zeros_like(y_bin)
        p_sub = np.zeros_like(y_bin, dtype=float)

        for train_index, test_index in cp.split(z_norm, y_bin):
            best_svm.fit(z_norm[train_index], y_bin[train_index], sample_weight = w_aux[train_index])
            y_sub[test_index] = best_svm.predict(z_norm[test_index])
            p_sub[test_index] = best_svm.predict_proba(z_norm[test_index])[:,1]

        best_svm.fit(z_norm, y_bin, sample_weight=w_aux)
        y_hat = best_svm.predict(z_norm)
        p_hat = best_svm.predict_proba(z_norm)[:, 1]

    return best_svm, y_sub, p_sub, y_hat, p_hat, c, g

In [31]:
#get the first column of z_norm

best_svm, y_sub, p_sub, y_hat, p_hat, c, g = fitmatsvm(z_norm, y_bin, w_aux, cp, kernel_fcn, params)

first half


In [32]:
print(c) # supposed 6.2279
print(g) # supposed 15.9350

0.5284851789475358
0.35470094682786174


In [33]:
p_hat


array([0.17025463, 0.91298556, 0.62369753, 0.25005805, 0.91171741,
       0.33246258, 0.16916596, 0.09019149, 0.07874822, 0.1910628 ,
       0.19870037, 0.18470569, 0.27078047, 0.07809314, 0.08632867,
       0.07853186, 0.6283173 , 0.59343949, 0.92448617, 0.58661901,
       0.26989118, 0.7062195 , 0.13007707, 0.1575783 , 0.09526836,
       0.09769109, 0.89756971, 0.89591496, 0.90860279, 0.11179468,
       0.85230837, 0.9171669 , 0.0992018 , 0.86128096, 0.91694532,
       0.88876272, 0.6125706 , 0.89638709, 0.89912246, 0.07949753,
       0.06938976, 0.86431071, 0.90492766, 0.91196984, 0.90897675,
       0.89946629, 0.09625262, 0.3492169 , 0.10966744, 0.92434817,
       0.9008076 , 0.92365149, 0.4559187 , 0.90243935, 0.91348241,
       0.86574689, 0.86685004, 0.90540164, 0.28594755, 0.62960018,
       0.89730131, 0.91084962, 0.90382669, 0.80826343, 0.9136459 ,
       0.90228292, 0.88596116, 0.90151948, 0.80324603, 0.89975443,
       0.88791164, 0.72015851, 0.88634579, 0.89584522, 0.91017