In [1]:
import numpy as np
import pandas as pd

import fragility_index as fi

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC



In [2]:
result = pd.DataFrame(columns = ["Data_name","Model_name","Probability","Mean","Std","VaR%95","CVaR%95","VaR%99","CVaR%99"])


data_name_list = ['BreastCancerCoimbra','LiverDisorders', 'LiverPatient']
# data_name_list = ['BreastCancerCoimbra']

        
for data_name in data_name_list: 
    print(data_name)
    x,y = fi.Data_load(data_name)  
    
    for _seed in range(20):
        print(_seed)
        np.random.seed(_seed) 
        X_sample, X_test, y_sample, y_test = train_test_split(x, y, test_size=0.2)
        # np.savez(f'./Split_BCC/split_bcc_seed_{_seed}', x_train=X_sample, x_test=X_test, y_train=y_sample, y_test=y_test)
        
        scaler = StandardScaler()
        X_sample = scaler.fit_transform(X_sample)
        X_test = scaler.transform(X_test)
        
        data_p = X_sample[y_sample==1,:]
        data_n = X_sample[y_sample==-1,:]
        data_test_p = X_test[y_test==1,:]
        data_test_n = X_test[y_test==-1,:]
        
        N = X_sample.shape[1]   # number of attribute
        
        M_p = data_p.shape[0]
        M_n = data_n.shape[0]
        S = M_p * M_n        
        
        M_test_p = data_test_p.shape[0]
        M_test_n = data_test_n.shape[0]
        S_test = M_test_p*M_test_n
        
        data_sample = np.zeros((S, 2*N))
        for i in range(M_p):
            for j in range(M_n):
                data_sample[i * M_n + j,:N] = data_p[i,:]
                data_sample[i * M_n + j,N:] = data_n[j,:]
                       
        data_test = np.zeros((S_test, 2*N))
        for i in range(M_test_p):
            for j in range(M_test_n):
                data_test[i * M_test_n + j,:N] = data_test_p[i,:]
                data_test[i * M_test_n + j,N:] = data_test_n[j,:]
        
        lb_p = data_p.min(axis=0)
        lb_n = data_n.min(axis=0)
        ub_p = data_p.max(axis=0)
        ub_n = data_n.max(axis=0)

        w_FI = fi.FI_minimization(N,S,data_sample, lb_p,lb_n,ub_p,ub_n,LogToConsole=False)
        w_bAUC = fi.bAUC(N,S,data_sample,LogToConsole=False) 
        # np.savez(f'./Split_BCC/w_bbc_seed_{_seed}', w_FI=w_FI, w_bAUC=w_bAUC)
        # clf_lr = LogisticRegression(random_state=0)
        # clf_lr.fit(X_sample, y_sample)
        # clf_lda = LinearDiscriminantAnalysis()
        # clf_lda.fit(X_sample, y_sample)
        # clf_lsvm = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5))
        # clf_lsvm.fit(X_sample, y_sample)

        result_FI = fi.performance(data_name,"FI",w_FI,N,S_test,data_test) 
        result_bAUC = fi.performance(data_name,"bAUC",w_bAUC,N,S_test,data_test)
        # result_lr = fi.performance_of_error(data_name, 'LR', fi.calculate_error(clf_lr, X_test, y_test)) 
        # result_lda = fi.performance_of_error(data_name, 'LDA', fi.calculate_error(clf_lda, X_test, y_test))
        # result_lsvm = fi.performance_of_error(data_name, 'LSVM', fi.calculate_error(clf_lsvm, X_test, y_test))     
                
        result = pd.concat([result, result_FI, result_bAUC]) 
        # result = pd.concat([result_lr])
        

result.groupby(['Data_name', 'Model_name']).mean().reset_index()

BreastCancerCoimbra
0
Academic license - for non-commercial use only - expires 2023-10-04
Using license file C:\Users\44395\gurobi.lic
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
LiverDisorders
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
LiverPatient
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


Unnamed: 0,Data_name,Model_name,Probability,Mean,Std,VaR%95,CVaR%95,VaR%99,CVaR%99
0,BreastCancerCoimbra,FI,0.688299,0.244858,0.508061,0.424916,0.563877,0.640105,0.688247
1,BreastCancerCoimbra,bAUC,0.754999,0.364272,0.792314,0.663132,0.876434,0.97619,1.067755
2,LiverDisorders,FI,0.631747,0.302487,0.480907,0.59613,0.788313,0.907462,1.040437
3,LiverDisorders,bAUC,0.626832,0.322905,0.499669,0.629075,0.816166,0.930241,1.058634
4,LiverPatient,FI,0.724419,0.257147,0.626016,0.427263,0.609144,0.723122,0.869408
5,LiverPatient,bAUC,0.698279,0.32242,0.762498,0.539449,0.810485,1.010673,1.169479


In [3]:
result

Unnamed: 0,Data_name,Model_name,Probability,Mean,Std,VaR%95,CVaR%95,VaR%99,CVaR%99
0,BreastCancerCoimbra,FI,0.622378,0.290772,0.762955,0.536152,0.670261,0.777338,0.824414
0,BreastCancerCoimbra,bAUC,0.636364,0.374639,1.117409,0.722804,1.032159,1.150915,1.205163
0,BreastCancerCoimbra,FI,0.759259,0.248761,0.674341,0.384623,0.598340,0.771408,0.795729
0,BreastCancerCoimbra,bAUC,0.685185,0.328041,1.063414,0.638104,0.811605,0.842614,0.951310
0,BreastCancerCoimbra,FI,0.517483,0.236420,0.382421,0.570257,0.647041,0.718954,0.771499
...,...,...,...,...,...,...,...,...,...
0,LiverPatient,bAUC,0.638396,0.355975,0.617742,0.636386,0.834958,0.949153,1.170044
0,LiverPatient,FI,0.779797,0.310004,0.580741,0.453735,0.703084,0.840543,1.087947
0,LiverPatient,bAUC,0.774725,0.472610,0.741745,0.585115,1.353067,1.874713,2.131196
0,LiverPatient,FI,0.696935,0.232448,0.485706,0.406671,0.535192,0.602844,0.722866


In [4]:
result_BCC_LD_ILDP = result.groupby(['Data_name', 'Model_name']).mean().reset_index()
result_BCC_LD_ILDP

Unnamed: 0,Data_name,Model_name,Probability,Mean,Std,VaR%95,CVaR%95,VaR%99,CVaR%99
0,BreastCancerCoimbra,FI,0.688299,0.244858,0.508061,0.424916,0.563877,0.640105,0.688247
1,BreastCancerCoimbra,bAUC,0.754999,0.364272,0.792314,0.663132,0.876434,0.97619,1.067755
2,LiverDisorders,FI,0.631747,0.302487,0.480907,0.59613,0.788313,0.907462,1.040437
3,LiverDisorders,bAUC,0.626832,0.322905,0.499669,0.629075,0.816166,0.930241,1.058634
4,LiverPatient,FI,0.724419,0.257147,0.626016,0.427263,0.609144,0.723122,0.869408
5,LiverPatient,bAUC,0.698279,0.32242,0.762498,0.539449,0.810485,1.010673,1.169479
