In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import matthews_corrcoef, make_scorer
from sklearn.preprocessing import StandardScaler

from src.model.instance_hardness import *
from src.utils import *
from src.model.dkdn import *

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [14]:
for experiment in ['appendicitis']:

    print(f'Experiment: {experiment}\n')

#     results_folder = 'results/errors'

#     os.makedirs(results_folder, exist_ok=True)

    data = pd.read_parquet(f'../data/{experiment}.parquet')

    # Preprocessing
    scaler = StandardScaler()
    X = scaler.fit_transform(data.drop(columns=['y']))
    y = data.y.values

    y[y == -1] = 0
    y = y.astype(int)
    rng_seed = 1234


    exp_info = {experiment:{}}
    for k in range(1, 12):
        print(k)
        complexity_kdn, _ = kdn_score(X, y, k)
        global_complexity_kdn = np.mean(complexity_kdn)
        class0_complexity_kdn = np.mean(complexity_kdn[y < 1])
        class1_complexity_kdn = np.mean(complexity_kdn[y > 0])
        print(f'complexity_kdn {global_complexity_kdn}')
        
        dynamic_kdn = DkDN(k=k)
        dynamic_kdn.fit(X, y)
        complexity_dynamic_kdn = dynamic_kdn.complexity
        global_complexity_dynamic_kdn = np.mean(complexity_dynamic_kdn)
        class0_complexity_dynamic_kdn = np.mean(complexity_dynamic_kdn[y < 1])
        class1_complexity_dynamic_kdn = np.mean(complexity_dynamic_kdn[y > 0])
        print(f'complexity_dynamic_kdn {global_complexity_dynamic_kdn}')
        
        dynamic_kdn_full_zone = DkDN(k=k)
        dynamic_kdn_full_zone.fit(X, y, exclude_center=False)
        complexity_dynamic_kdn_full_zone = dynamic_kdn_full_zone.complexity
        global_complexity_dynamic_kdn_full_zone = np.mean(complexity_dynamic_kdn_full_zone)
        class0_complexity_dynamic_kdn_full_zone = np.mean(complexity_dynamic_kdn_full_zone[y < 1])
        class1_complexity_dynamic_kdn_full_zone = np.mean(complexity_dynamic_kdn_full_zone[y > 0])
        print(f'complexity_dynamic_kdn_full_zone {global_complexity_dynamic_kdn_full_zone} \n')
        
        k_info = {'kdn': {'global': global_complexity_kdn,
                          'class 0': class0_complexity_kdn,
                          'class 1': class1_complexity_kdn
                         },
                  'dynamic_kdn': {'global': global_complexity_dynamic_kdn,
                          'class 0': class0_complexity_dynamic_kdn,
                          'class 1': class1_complexity_dynamic_kdn
                         },
                  'dynamic_kdn_full_zone': {'global': global_complexity_dynamic_kdn_full_zone,
                          'class 0': class0_complexity_dynamic_kdn_full_zone,
                          'class 1': class1_complexity_dynamic_kdn_full_zone
                         }
                 }
        
        exp_info[experiment][k] = k_info


Experiment: appendicitis

1
complexity_kdn 0.18867924528301888
complexity_dynamic_kdn 0.18867924528301888
complexity_dynamic_kdn_full_zone 0.05471698113207547 

2
complexity_kdn 0.18396226415094338
complexity_dynamic_kdn 0.18924528301886795
complexity_dynamic_kdn_full_zone 0.10320754716981133 

3
complexity_kdn 0.18867924528301888
complexity_dynamic_kdn 0.20867924528301884
complexity_dynamic_kdn_full_zone 0.1350943396226415 

4
complexity_kdn 0.19339622641509435
complexity_dynamic_kdn 0.20632075471698114
complexity_dynamic_kdn_full_zone 0.1419811320754717 

5
complexity_kdn 0.19811320754716982
complexity_dynamic_kdn 0.2036792452830189
complexity_dynamic_kdn_full_zone 0.14962264150943397 

6
complexity_kdn 0.20754716981132076
complexity_dynamic_kdn 0.20641509433962263
complexity_dynamic_kdn_full_zone 0.15386792452830186 

7
complexity_kdn 0.2088948787061994
complexity_dynamic_kdn 0.2080188679245283
complexity_dynamic_kdn_full_zone 0.16047169811320752 

8
complexity_kdn 0.205188679245283