In [1]:
import os
os.environ['CUDAHOME'] = '/usr/local/cuda'

from tqdm.auto import tqdm
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import KFold, cross_val_score
from soma.generators.age import AgeGenerator
from soma.tests.som import som_test

plt.style.use('seaborn')
plt.style.use('seaborn-paper')

  from scipy.sparse.base import spmatrix
  from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1
  from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1


In [2]:
brackets = [
    (15, 20), (20, 25), (25, 30), (30, 35), (35, 40), (40, 45), (45, 50)
]
compare = list(zip(brackets[:-1], brackets[1:]))

In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin


class SOMWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, size=(20, 15)):
        super(SOMWrapper, self).__init__()
        self.__size = size
        self.__p = None
        self.__som = None
        self.__class = None

    def get_params(self, deep: bool = False):
        return {'size': self.__size}

    def set_params(self, size):
        self.__size = size

    def fit(self, X, y):
        self.__p, self.__som, (count1, count2) = som_test(X[y == 0], X[y == 1], ret_counts=True, ret_som=True,
                                                          size=self.__size, kerneltype=1)
        self.__class = (count2 > count1).astype(int).T
        return self

    def predict(self, X):
        bmus = self.__som.get_bmus(self.__som.get_surface_state(X))
        return self.__class[bmus[:, 0], bmus[:, 1]]

In [4]:
kfold = KFold(n_splits=5, random_state=None, shuffle=True)
sample_size = 625

for ba, bb in tqdm(compare):
    sample_a = AgeGenerator(*ba).sample(sample_size)
    sample_b = AgeGenerator(*bb).sample(sample_size)

    X = np.concatenate([sample_a, sample_b])
    y = np.repeat([0, 1], sample_size)

    scores = cross_val_score(SOMWrapper(size=(20, 15)), X, y, scoring='accuracy', cv=kfold, n_jobs=1,
                             error_score='raise')
    print(ba, bb, np.mean(scores), np.std(scores))

  0%|          | 0/6 [00:00<?, ?it/s]

(15, 20) (20, 25) 0.6 0.02517141235608366
(20, 25) (25, 30) 0.5688 0.03518181348367362
(25, 30) (30, 35) 0.5512 0.017232527382830397
(30, 35) (35, 40) 0.536 0.024265201420964957
(35, 40) (40, 45) 0.524 0.02478709341572749
(40, 45) (45, 50) 0.556 0.022908513701242136


In [5]:
kfold = KFold(n_splits=5, random_state=None, shuffle=True)
sample_size = 5000

for ba, bb in tqdm(compare):
    sample_a = AgeGenerator(*ba).sample(sample_size)
    sample_b = AgeGenerator(*bb).sample(sample_size)

    X = np.concatenate([sample_a, sample_b])
    y = np.repeat([0, 1], sample_size)

    scores = cross_val_score(SOMWrapper(size=(20, 15)), X, y, scoring='accuracy', cv=kfold, n_jobs=1,
                             error_score='raise')
    print(ba, bb, np.mean(scores), np.std(scores))

  0%|          | 0/6 [00:00<?, ?it/s]

(15, 20) (20, 25) 0.6554 0.003799999999999996
(20, 25) (25, 30) 0.6196999999999999 0.0033105890714493586
(25, 30) (30, 35) 0.6008 0.010181355508968372
(30, 35) (35, 40) 0.5912 0.016289874155437815
(35, 40) (40, 45) 0.5835999999999999 0.00939893611000736
(40, 45) (45, 50) 0.5887 0.008213403679352409
