In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from sksurv.svm import FastKernelSurvivalSVM
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.model_selection import train_test_split
from joblib import dump, load
import pickle


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
experiments = {
    'sift' : {
        'dataset':  'features/dataset_reduced.csv_SIFT_.npy',
        'model': {
            'log': 'models/dataset_reduced.csv_SIFT_.npy_log_01-02-2023 06-29-21_model.sav',
            'power': 'models/dataset_reduced.csv_SIFT_.npy_power_01-02-2023 07-07-27_model.sav',
            'mixture': 'models/dataset_reduced.csv_SIFT_.npy_mixture_31-01-2023 13-20-17_model.sav',
        },
        'scores': {
            'log': 'scores/dataset_reduced.csv_SIFT_.npy_log_26-01-2023 06-55-40_model.sav',
            'power': 'scores/dataset_reduced.csv_SIFT_.npy_power_25-01-2023 16-32-18_model.sav',
            'mixture': 'scores/dataset_reduced.csv_SIFT_.npy_mixture_25-01-2023 22-27-27_model.sav',
        },
    },
    'hog' : {
        'dataset':  'features/dataset_reduced.csv_HOG_.npy',
        'model': {
            'log': 'models/dataset_reduced.csv_HOG_.npy_log_01-02-2023 06-57-32_model.sav',
            'power': 'models/dataset_reduced.csv_HOG_.npy_power_01-02-2023 07-46-17_model.sav',
            'mixture': 'models/dataset_reduced.csv_HOG_.npy_mixture_03-02-2023 13-19-27_model.sav',
        },
        'scores': {
            'log': 'scores/dataset_reduced.csv_HOG_.npy_log_20-01-2023 22-29-15_model.sav',
            'power': 'scores/dataset_reduced.csv_HOG__power_19-01-2023 18-42-58_model.sav',
            'mixture': 'scores/dataset_reduced.csv_HOG_.npy_mixture_23-01-2023 14-34-38_model.sav',
        }, 
    },
    'kdesa' : {
        'dataset':  'features/dataset_reduced.csv_KDESA_.npy',
        'model': {
            'log': 'models/dataset_reduced.csv_KDESA_.npy_log_03-02-2023 22-47-11_model.sav',
            'power': 'models/dataset_reduced.csv_KDESA_.npy_power_03-02-2023 22-44-27_model.sav',
            'mixture': 'models/dataset_reduced.csv_KDESA_.npy_mixture_03-02-2023 13-01-11_model.sav',
        }, 
        'scores': {
            'log': 'scores/dataset_reduced.csv_KDESA_.npy_log_21-01-2023 07-35-41_model.sav',
            'power': 'scores/dataset_reduced.csv_KDESA_.npy_power_21-01-2023 06-10-39_model.sav',
            'mixture': 'scores/dataset_reduced.csv_KDESA_.npy_mixture_23-01-2023 14-41-13_model.sav',
        },
    },
}

In [3]:
alpha=0.05
gamma=0.5
degree=3
beta=0.8
random_state=0

def gramMatrix(X1, X2, K_function):
    gram_matrix = np.zeros((X1.shape[0], X2.shape[0]))
    for i, x1 in enumerate(X1):
        for j, x2 in enumerate(X2):
            gram_matrix[i, j] = K_function(x1, x2)
    return gram_matrix

kernels = {
    'power': lambda x: gramMatrix(x, x, lambda x1, x2: np.clip(-np.linalg.norm(x1 - x2)**beta, 2e-100, 2e100)),
    'log': lambda x:  gramMatrix(x, x, lambda x1, x2: np.clip(-np.log(1 + np.linalg.norm(x1 - x2, axis=0)**beta), 2e-100, 2e100)),
    'mixture': lambda x: np.clip(alpha * pairwise_kernels(x, metric='rbf', gamma=gamma) + (1-alpha)*pairwise_kernels(x, metric='poly', degree=degree), 2e-100, 2e100),
}

In [4]:
# create dataframe to store results
results = pd.DataFrame(columns=['dataset', 'kernel', 'c-index', 'time boundary' 'accuracy','precision', 'f1', 'auc'])

In [6]:
for (feature, data) in experiments.items():
    print(f'Feature: {feature}')
    dataset = np.load(data['dataset'])
    x = np.delete(dataset, [-3, -2, -1], axis=1).astype(np.float16)
    sample_indices = np.random.choice(x.shape[0], int(0.2 * x.shape[0]), False)
    labels = dataset[:, [-3, -2, -1]].astype(np.int8) # labels: 1 - invalid, 0 - valid
    # X, labels_sample = (x[sample_indices, :], labels[sample_indices, :])
    # y = labels_sample[:, -1].astype(np.int8) # labels: 1 - invalid, 0 - valid
    
    for (kernel_name, model_path) in data['model'].items():
        print(f'Kernel: {kernel_name}')
        
        scores = pickle.load(open(data['scores'][kernel_name], 'rb'))
        print(f'c-index: {scores.score}')
        
        model = load(open(model_path, 'rb'))
        print(model.coef_)
        
        kernel = kernels[kernel_name]
        kernel_matrix = kernel(x)
        # save kernel matrix with labels
        np.save(f'kernel_matrices/{feature}_{kernel_name}_kernel_matrix.npy', kernel_matrix)
        np.save(f'kernel_matrices/{feature}_{kernel_name}_labels.npy', labels)
        
        times = model.predict(x)
        print(times)

Feature: sift
Kernel: log
c-index: 0.5
[0. 0. 0. ... 0. 0. 0.]


  return sqrt(add.reduce(s, axis=axis, keepdims=keepdims))


In [None]:
# get cwd
cwd = os.getcwd()
cwd

'i:\\projets\\school\\Thesis\\experiments'

In [None]:
print(model.cv_results)

{'fit_time': array([0.14061165, 0.09375715, 0.07812738, 0.07813907]), 'score_time': array([0.        , 0.01561785, 0.        , 0.01562166]), 'test_score': array([0.5, 0.5, 0.5, 0.5]), 'train_score': array([0.5, 0.5, 0.5, 0.5])}
