In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [11]:
plasma = pd.read_csv('D:\GitHub\\4BIT\metabolites\dataset\plasma_processed.csv')
plasma = plasma.drop(['Unnamed: 0'], axis=1)
plasma

Unnamed: 0,1_5-anhydroglucitol,1-monostearin,2_3-dihydroxybutanoic acid NIST,2_4-diaminobutyric acid,2-deoxyerythritol,2-deoxytetronic acid,2-hydroxybutanoic acid,2-hydroxyglutaric acid,2-hydroxyhippuric acid,2-hydroxyvaleric acid,...,tryptophan,tyrosine,UDP-glucuronic acid,urea,uric acid,uridine,valine,xylitol,xylose,Class
0,9483,103,101,963,389,123,20744,139,36,955,...,27450,52403,73,255067,10057,63,157332,154,1380,disease
1,27468,207,155,335,227,178,8611,78,91,103,...,17898,37179,157,180080,8386,118,103083,324,929,disease
2,13976,86,78,176,152,83,22792,92,97,1010,...,29503,30670,211,138861,15822,119,92045,253,610,disease
3,19449,178,104,467,270,157,29418,123,92,289,...,16501,27573,71,183633,14024,102,97349,223,746,disease
4,17209,258,103,979,314,211,16713,76,42,1126,...,24023,30096,222,255081,23154,58,159671,189,2022,disease
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,25149,184,154,444,142,244,20728,51,128,186,...,30518,36411,114,226793,17880,57,114920,252,856,control
78,24181,142,135,786,226,146,10688,64,59,80,...,23731,26506,110,365044,29018,61,127248,359,1003,control
79,23848,118,65,132,352,294,10077,42,56,867,...,30277,35764,158,327943,19736,76,78152,453,1279,control
80,13848,97,65,881,444,122,14869,75,203,1104,...,14930,18921,2176,323303,17479,108,96830,1450,1045,control


In [12]:
plasma.dtypes

1_5-anhydroglucitol                 int64
1-monostearin                       int64
2_3-dihydroxybutanoic acid NIST     int64
2_4-diaminobutyric acid             int64
2-deoxyerythritol                   int64
                                    ...  
uridine                             int64
valine                              int64
xylitol                             int64
xylose                              int64
Class                              object
Length: 159, dtype: object

# Deal with NaN Values:

In [13]:
plasma.isnull().any()

1_5-anhydroglucitol                False
1-monostearin                      False
2_3-dihydroxybutanoic acid NIST    False
2_4-diaminobutyric acid            False
2-deoxyerythritol                  False
                                   ...  
uridine                            False
valine                             False
xylitol                            False
xylose                             False
Class                              False
Length: 159, dtype: bool

In [14]:
# plasma.corr()

In [15]:
from scipy import stats

pearson_p = []
for i in plasma.columns:
    if plasma[i].dtypes!='O':
        pearson_coef, p_value = stats.pearsonr(plasma[i], plasma['Class'].map({"control": 0, "disease": 1}))
        pearson_p.append([i, pearson_coef, p_value])
pearson_p_df = pd.DataFrame(pearson_p, columns=['Features', 'Pearson Coefficient', 'p-Value'])
pearson_p_df

Unnamed: 0,Features,Pearson Coefficient,p-Value
0,1_5-anhydroglucitol,-0.051611,0.645167
1,1-monostearin,0.176846,0.111975
2,2_3-dihydroxybutanoic acid NIST,0.076688,0.493476
3,2_4-diaminobutyric acid,0.076201,0.496233
4,2-deoxyerythritol,0.060320,0.590351
...,...,...,...
153,uric acid,-0.310864,0.004475
154,uridine,-0.001496,0.989356
155,valine,-0.094143,0.400187
156,xylitol,-0.068039,0.543606


In [16]:
pearson_p_final = []
for i in range(158):
    if pearson_p_df.loc[i, 'p-Value'] < 0.001:
        pearson_p_final.append(pearson_p_df.loc[i, 'Features'])
pearson_p_final

['3-phosphoglycerate',
 '5-methoxytryptamine',
 'adenosine-5-monophosphate',
 'asparagine',
 'aspartic acid',
 'lactic acid',
 'phenol',
 'pyrophosphate',
 'pyruvic acid',
 'taurine']

### So important variables we derived:

'3-phosphoglycerate',
 '5-methoxytryptamine',
 'adenosine-5-monophosphate',
 'asparagine',
 'aspartic acid',
 'lactic acid',
 'phenol',
 'pyrophosphate',
 'pyruvic acid',
 'taurine'

In [17]:
# necessary functions
from sklearn.decomposition import PCA
labels = np.reshape(plasma['Class'].to_numpy(), (82,1))

def pca_analysis(df, n_components):
    pca = PCA(n_components=n_components)
    pca_result = pca.fit_transform(df.iloc[:, :-1].values)

    pca_concatenate = np.concatenate([pca_result, labels], axis=1)
    return pca_concatenate

def make_dataframe(array, n_features):
    col = []
    for i in range(1, n_features+1):
        col.append(f'pc{i}')
    col.append('Class')
    return pd.DataFrame(array, columns = col)

from sklearn.manifold import TSNE
def tsne_analysis(df, n_components):
    tsne = TSNE(n_components=n_components)
    tsne_result = tsne.fit_transform(df.iloc[:, :-1].values)

    tsne_concatenate = np.concatenate([tsne_result, labels], axis=1)
    return tsne_concatenate
def tsne_dataframe(array, n_features):
    col = []
    for i in range(1, n_features+1):
        col.append(f'tsne{i}')
    col.append('Class')
    return pd.DataFrame(array, columns = col)

def get_xy(df):
  X = df.drop('Class', axis=1)
  X = X.apply(pd.to_numeric)
  y = df['Class'].copy()
  y = y.map({"control": 0, "disease": 1})
  return X, y

# Lzypredict

In [18]:
import lazypredict
from lazypredict.Supervised import LazyClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import classification_report

from sklearn.model_selection import ShuffleSplit, train_test_split, StratifiedKFold

In [19]:
imp0 = ['3-phosphoglycerate',
 '5-methoxytryptamine',
 'adenosine-5-monophosphate',
 'asparagine',
 'aspartic acid',
 'lactic acid',
 'phenol',
 'pyrophosphate',
 'pyruvic acid',
 'taurine']

In [20]:
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
accuracy = []
for k in [0, 2, 3, 4, 5, 22, 23]:
    
    if k==0:
        name = ['Plasma(Exploratory)']
        dfs = [plasma[imp0]]
    
    elif k==22 or k==23:
        k -= 20
        name = [f'tsne{k}_plasma(Exploratory)']
        vars()[f'tsne{k}_plasma'] = tsne_analysis(plasma[imp0], k)
        vars()[f'tsne{k}_plasma_df'] = make_dataframe(vars()[f'tsne{k}_plasma'], k)
        dfs = [vars()[f'tsne{k}_plasma_df']]
    
    else:
        name = [f'pca{k}_plasma(Exploratory)']
        vars()[f'pca{k}_plasma'] = pca_analysis(plasma[imp0], k)
        vars()[f'pca{k}_plasma_df'] = make_dataframe(vars()[f'pca{k}_plasma'], k)
        
        dfs = [vars()[f'pca{k}_plasma_df']]
    
    random_state = 42

    for idx, data in enumerate(dfs):
        result = []
        if data.columns[-1]=='Class':
            X = data.drop('Class', axis=1)
        else:
            X = data
        y = plasma['Class'].copy()
        y = y.map({"control": 0, "disease": 1})
        rs = ShuffleSplit(n_splits=10, test_size=.2, random_state=random_state)

        for train_index, test_index in rs.split(X):
            clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=matthews_corrcoef, predictions=True)
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            cols = X_train.columns
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)

            X_train = pd.DataFrame(X_train, columns=[cols])
            X_test = pd.DataFrame(X_test, columns=[cols])

            models, predictions = clf.fit(X_train, X_test, y_train, y_test)
            result.append(models)
        #     print(result)

        acc = []
        for i in range(10):
            acc.append(result[i].iloc[0]['Balanced Accuracy'])

        print(f'\n{name[idx]}: {sum(acc)/len(acc)}')
        accuracy.append([f'{name[idx]}', f'{sum(acc)/len(acc)}'])

  3%|▎         | 1/29 [00:00<00:03,  7.88it/s]

100%|██████████| 29/29 [00:01<00:00, 17.19it/s]


[LightGBM] [Info] Number of positive: 30, number of negative: 35
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000054 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 226
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.461538 -> initscore=-0.154151
[LightGBM] [Info] Start training from score -0.154151


100%|██████████| 29/29 [00:01<00:00, 26.50it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000041 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 233
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:01<00:00, 19.78it/s]


[LightGBM] [Info] Number of positive: 28, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 230
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.430769 -> initscore=-0.278713
[LightGBM] [Info] Start training from score -0.278713


100%|██████████| 29/29 [00:01<00:00, 28.20it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 234
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:00<00:00, 33.52it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 227
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:01<00:00, 17.99it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 228
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:01<00:00, 28.85it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 228
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 30.26it/s]


[LightGBM] [Info] Number of positive: 36, number of negative: 29
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 230
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.553846 -> initscore=0.216223
[LightGBM] [Info] Start training from score 0.216223


100%|██████████| 29/29 [00:00<00:00, 38.60it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000028 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 228
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:00<00:00, 36.77it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000036 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 230
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772

Plasma(Exploratory): 0.8846105283605284


100%|██████████| 29/29 [00:00<00:00, 32.20it/s]


[LightGBM] [Info] Number of positive: 30, number of negative: 35
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000016 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.461538 -> initscore=-0.154151
[LightGBM] [Info] Start training from score -0.154151


100%|██████████| 29/29 [00:00<00:00, 39.01it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:00<00:00, 33.58it/s]


[LightGBM] [Info] Number of positive: 28, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000016 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.430769 -> initscore=-0.278713
[LightGBM] [Info] Start training from score -0.278713


100%|██████████| 29/29 [00:00<00:00, 36.21it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:01<00:00, 27.73it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:00<00:00, 29.72it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000030 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 39.39it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 32.69it/s]


[LightGBM] [Info] Number of positive: 36, number of negative: 29
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.553846 -> initscore=0.216223
[LightGBM] [Info] Start training from score 0.216223


100%|██████████| 29/29 [00:00<00:00, 34.12it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000023 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:00<00:00, 36.12it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772

pca2_plasma(Exploratory): 0.8141608391608391


100%|██████████| 29/29 [00:00<00:00, 29.16it/s]


[LightGBM] [Info] Number of positive: 30, number of negative: 35
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000024 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.461538 -> initscore=-0.154151
[LightGBM] [Info] Start training from score -0.154151


100%|██████████| 29/29 [00:01<00:00, 24.85it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:00<00:00, 31.42it/s]


[LightGBM] [Info] Number of positive: 28, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.430769 -> initscore=-0.278713
[LightGBM] [Info] Start training from score -0.278713


100%|██████████| 29/29 [00:00<00:00, 31.77it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:00<00:00, 32.30it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:00<00:00, 32.39it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000026 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 33.69it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 33.14it/s]


[LightGBM] [Info] Number of positive: 36, number of negative: 29
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.553846 -> initscore=0.216223
[LightGBM] [Info] Start training from score 0.216223


100%|██████████| 29/29 [00:00<00:00, 38.68it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000026 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:00<00:00, 36.45it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772

pca3_plasma(Exploratory): 0.8393550893550893


100%|██████████| 29/29 [00:01<00:00, 25.28it/s]


[LightGBM] [Info] Number of positive: 30, number of negative: 35
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000027 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.461538 -> initscore=-0.154151
[LightGBM] [Info] Start training from score -0.154151


100%|██████████| 29/29 [00:00<00:00, 31.27it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000020 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:00<00:00, 30.54it/s]


[LightGBM] [Info] Number of positive: 28, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 94
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.430769 -> initscore=-0.278713
[LightGBM] [Info] Start training from score -0.278713


100%|██████████| 29/29 [00:00<00:00, 29.01it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:00<00:00, 30.97it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000023 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:01<00:00, 24.81it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000033 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 29.48it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:01<00:00, 26.37it/s]


[LightGBM] [Info] Number of positive: 36, number of negative: 29
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000020 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 92
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.553846 -> initscore=0.216223
[LightGBM] [Info] Start training from score 0.216223


100%|██████████| 29/29 [00:01<00:00, 22.60it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:01<00:00, 25.42it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000032 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772

pca4_plasma(Exploratory): 0.8237461149961149


100%|██████████| 29/29 [00:01<00:00, 22.29it/s]


[LightGBM] [Info] Number of positive: 30, number of negative: 35
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000099 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 117
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.461538 -> initscore=-0.154151
[LightGBM] [Info] Start training from score -0.154151


100%|██████████| 29/29 [00:01<00:00, 18.54it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000031 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 117
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:01<00:00, 19.93it/s]


[LightGBM] [Info] Number of positive: 28, number of negative: 37
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000020 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 117
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.430769 -> initscore=-0.278713
[LightGBM] [Info] Start training from score -0.278713


100%|██████████| 29/29 [00:01<00:00, 17.82it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000320 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 116
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:02<00:00, 12.28it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 117
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:02<00:00, 12.44it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 115
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:02<00:00, 12.66it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001600 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 116
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:02<00:00, 12.29it/s]


[LightGBM] [Info] Number of positive: 36, number of negative: 29
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000095 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 116
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.553846 -> initscore=0.216223
[LightGBM] [Info] Start training from score 0.216223


100%|██████████| 29/29 [00:02<00:00, 12.77it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 116
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:02<00:00, 12.91it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000080 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 116
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 5
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772

pca5_plasma(Exploratory): 0.8438422688422689


100%|██████████| 29/29 [00:02<00:00, 13.41it/s]


[LightGBM] [Info] Number of positive: 30, number of negative: 35
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.461538 -> initscore=-0.154151
[LightGBM] [Info] Start training from score -0.154151


100%|██████████| 29/29 [00:02<00:00, 12.85it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:02<00:00, 12.18it/s]


[LightGBM] [Info] Number of positive: 28, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000073 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.430769 -> initscore=-0.278713
[LightGBM] [Info] Start training from score -0.278713


100%|██████████| 29/29 [00:02<00:00, 12.63it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:02<00:00, 12.76it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000057 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:02<00:00, 12.98it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000058 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:02<00:00, 13.14it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:02<00:00, 13.27it/s]


[LightGBM] [Info] Number of positive: 36, number of negative: 29
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.553846 -> initscore=0.216223
[LightGBM] [Info] Start training from score 0.216223


100%|██████████| 29/29 [00:02<00:00, 12.27it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000052 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 47
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:02<00:00, 12.97it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000055 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 46
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772

tsne2_plasma(Exploratory): 0.7819638694638694


100%|██████████| 29/29 [00:02<00:00, 13.39it/s]


[LightGBM] [Info] Number of positive: 30, number of negative: 35
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000060 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 71
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.461538 -> initscore=-0.154151
[LightGBM] [Info] Start training from score -0.154151


100%|██████████| 29/29 [00:02<00:00, 12.13it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000061 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:02<00:00, 11.32it/s]


[LightGBM] [Info] Number of positive: 28, number of negative: 37
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.430769 -> initscore=-0.278713
[LightGBM] [Info] Start training from score -0.278713


100%|██████████| 29/29 [00:01<00:00, 19.73it/s]


[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772


100%|██████████| 29/29 [00:01<00:00, 26.86it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000030 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:00<00:00, 29.68it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 36.93it/s]


[LightGBM] [Info] Number of positive: 32, number of negative: 33
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 71
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.492308 -> initscore=-0.030772
[LightGBM] [Info] Start training from score -0.030772


100%|██████████| 29/29 [00:00<00:00, 31.69it/s]


[LightGBM] [Info] Number of positive: 36, number of negative: 29
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 69
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.553846 -> initscore=0.216223
[LightGBM] [Info] Start training from score 0.216223


100%|██████████| 29/29 [00:00<00:00, 34.17it/s]


[LightGBM] [Info] Number of positive: 35, number of negative: 30
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.538462 -> initscore=0.154151
[LightGBM] [Info] Start training from score 0.154151


100%|██████████| 29/29 [00:00<00:00, 29.48it/s]

[LightGBM] [Info] Number of positive: 33, number of negative: 32
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 71
[LightGBM] [Info] Number of data points in the train set: 65, number of used features: 3
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.507692 -> initscore=0.030772
[LightGBM] [Info] Start training from score 0.030772

tsne3_plasma(Exploratory): 0.7571474358974359





In [21]:
models

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,matthews_corrcoef,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LabelPropagation,0.76,0.76,0.76,0.76,0.53,0.01
LabelSpreading,0.76,0.76,0.76,0.76,0.53,0.02
LGBMClassifier,0.71,0.72,0.72,0.7,0.45,0.03
PassiveAggressiveClassifier,0.71,0.71,0.71,0.71,0.42,0.01
KNeighborsClassifier,0.71,0.71,0.71,0.71,0.42,0.03
BernoulliNB,0.71,0.7,0.7,0.7,0.41,0.02
NuSVC,0.71,0.7,0.7,0.7,0.41,0.02
QuadraticDiscriminantAnalysis,0.65,0.65,0.65,0.64,0.31,0.02
SGDClassifier,0.59,0.6,0.6,0.58,0.2,0.02
LogisticRegression,0.59,0.59,0.59,0.59,0.18,0.02


In [22]:
result

[                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
 Model                                                                           
 XGBClassifier                      0.76               0.78     0.78      0.77   
 BernoulliNB                        0.71               0.70     0.70      0.71   
 LGBMClassifier                     0.65               0.65     0.65      0.65   
 RandomForestClassifier             0.65               0.65     0.65      0.65   
 ExtraTreesClassifier               0.65               0.65     0.65      0.65   
 GaussianNB                         0.65               0.65     0.65      0.65   
 ExtraTreeClassifier                0.59               0.64     0.64      0.59   
 NuSVC                              0.65               0.61     0.61      0.65   
 SVC                                0.65               0.61     0.61      0.65   
 RidgeClassifierCV                  0.59               0.61     0.61      0.60   
 KNeighborsClass

In [23]:
predictions

Unnamed: 0,AdaBoostClassifier,BaggingClassifier,BernoulliNB,CalibratedClassifierCV,DecisionTreeClassifier,DummyClassifier,ExtraTreeClassifier,ExtraTreesClassifier,GaussianNB,KNeighborsClassifier,...,PassiveAggressiveClassifier,Perceptron,QuadraticDiscriminantAnalysis,RandomForestClassifier,RidgeClassifier,RidgeClassifierCV,SGDClassifier,SVC,XGBClassifier,LGBMClassifier
0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,1,0,0,0,0,1,1,0,0,0,...,0,1,0,0,0,0,1,0,0,0
2,1,0,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,1,0,1,1
3,0,0,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0
4,0,0,0,0,0,1,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
5,0,1,0,0,1,1,0,0,0,0,...,0,1,0,1,0,0,1,0,1,1
6,1,1,1,1,0,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
7,1,1,0,1,1,1,0,1,1,1,...,1,1,1,1,1,1,1,1,1,0
8,0,0,0,0,0,1,0,0,0,0,...,1,1,1,0,0,0,0,0,0,1
9,1,0,1,1,0,1,0,1,1,0,...,1,1,1,1,1,1,1,1,0,1


In [25]:
accuracy

[['Plasma(Exploratory)', '0.8846105283605284'],
 ['pca2_plasma(Exploratory)', '0.8141608391608391'],
 ['pca3_plasma(Exploratory)', '0.8393550893550893'],
 ['pca4_plasma(Exploratory)', '0.8237461149961149'],
 ['pca5_plasma(Exploratory)', '0.8438422688422689'],
 ['tsne2_plasma(Exploratory)', '0.7819638694638694'],
 ['tsne3_plasma(Exploratory)', '0.7571474358974359']]

In [26]:
name

['tsne3_plasma(Exploratory)']

In [27]:
name[idx]

'tsne3_plasma(Exploratory)'

In [28]:
acc

[0.7803030303030303,
 0.7152777777777778,
 0.6923076923076923,
 0.8819444444444444,
 0.6515151515151515,
 0.7569444444444444,
 0.8194444444444444,
 0.775,
 0.7348484848484849,
 0.7638888888888888]

In [24]:
accuracy_df = pd.DataFrame(accuracy, columns=['Processed Dataset', 'Accuracy'])
accuracy_df.to_csv('D:\GitHub\\4BIT\metabolites\\accuracy\\accuracy_exploratory_plasma2.csv', index=False)
accuracy_df

Unnamed: 0,Processed Dataset,Accuracy
0,Plasma(Exploratory),0.8846105283605284
1,pca2_plasma(Exploratory),0.8141608391608391
2,pca3_plasma(Exploratory),0.8393550893550893
3,pca4_plasma(Exploratory),0.8237461149961149
4,pca5_plasma(Exploratory),0.8438422688422689
5,tsne2_plasma(Exploratory),0.7819638694638694
6,tsne3_plasma(Exploratory),0.7571474358974359
