In [1]:
import math
from sklearn import metrics
from sklearn import preprocessing
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import time
import datetime
import random
random.seed(1234)

from scipy import interp
import warnings
warnings.filterwarnings("ignore")

from collections import Counter
from functools import reduce
from tqdm import tqdm, trange
from copy import deepcopy

from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, auc
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
from sklearn.utils import class_weight

In [2]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import time
import random
from scipy import interp
import warnings
warnings.filterwarnings("ignore")

from collections import Counter
from tqdm import tqdm
import os

In [3]:
import math
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [4]:
seed = 19961231
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [5]:
def read_data(data_type, hla_sequence):
    pos_neg_data_pd = pd.read_csv('/home/chujunyi/5_ZY_MHC/Anthem/Dataset/' + data_type + '_pos_neg_data.csv', index_col = 0).drop_duplicates().reset_index(drop = True)
    print('# Samples = ', len(pos_neg_data_pd))
    pos_neg_data_pd = pd.merge(pos_neg_data_pd, hla_sequence, on = 'HLA')
    print('After delete unknown sequence HLA: # Samples = ', len(pos_neg_data_pd))
    return pos_neg_data_pd

In [7]:
hla_sequence = pd.read_csv('/home/chujunyi/5_ZY_MHC/data/common_hla_sequence.csv')
train_set = read_data('train', hla_sequence)
independent_set = read_data('evaluation', hla_sequence)
external_set = read_data('experimental', hla_sequence)

# Samples =  2178971
After delete unknown sequence HLA: # Samples =  2156796
# Samples =  174437
After delete unknown sequence HLA: # Samples =  172580
# Samples =  104209
After delete unknown sequence HLA: # Samples =  104209


# 去掉多余的负样本

In [8]:
def Remove_excess_negative_samples(data, train_set, type_ = 'train', savepath = True):
    idx = []
    for hla in tqdm(set(data.HLA)):
        for length in range(8,15):
            if type_ == 'train':
                for fold in [0.0, 1.0, 2.0, 3.0, 4.0]:
                    try:
                        idx.append(data[data.HLA == hla][data.length == length][data.neg_fold == fold].index[0])
                        continue
                    except:
                        continue
            else:
                try:
                    idx.append(data[data.HLA == hla][data.length == length][data.label == 0].index[0])
                    continue
                except:
                    continue
                        
    print('Remove num = ', len(idx))
    print('Original: ', Counter(data.label))
    data.drop(index = idx, axis = 0, inplace = True)
    data.reset_index(drop = True, inplace = True)
    print('After remove: ', Counter(data.label))
    
    # 去掉independent和external中在train set中的数据
    if type_ != 'train':
        copy_train_set = deepcopy(train_set)
        del copy_train_set['neg_fold']
        data = pd.concat([data, copy_train_set, copy_train_set]).drop_duplicates(['peptide', 'HLA'], keep = False).reset_index(drop = True)
        print('After remove duplicates with train_set: ', Counter(data.label))
        
    if savepath:
        data.to_csv('/home/chujunyi/5_ZY_MHC/Anthem/Dataset/{}_set.csv'.format(type_))
    return data

In [9]:
train_set = Remove_excess_negative_samples(train_set, train_set, 'train', )
independent_set = Remove_excess_negative_samples(independent_set, train_set, 'independent')
external_set = Remove_excess_negative_samples(external_set, train_set, 'external')

100%|██████████| 112/112 [14:20<00:00,  7.69s/it]


Remove num =  1800
Original:  Counter({0: 1797630, 1: 359166})
After remove:  Counter({0: 1795830, 1: 359166})


100%|██████████| 112/112 [00:15<00:00,  7.23it/s]


Remove num =  360
Original:  Counter({0: 86470, 1: 86110})
After remove:  Counter({1: 86110, 0: 86110})
After remove duplicates with train_set:  Counter({1: 85876, 0: 85562})


100%|██████████| 5/5 [00:00<00:00, 10.40it/s]


Remove num =  35
Original:  Counter({0: 52122, 1: 52087})
After remove:  Counter({1: 52087, 0: 52087})
After remove duplicates with train_set:  Counter({1: 51984, 0: 51881})


# 去掉independent和external在训练集中出现过的样本

In [10]:
def train_set_cv_fold_index(train_set):
    train_set_cv_idx_dict, test_set_cv_idx_dict = dict(), dict()   
    for hla in tqdm(set(train_set.HLA)):
        for length in range(8,15):

            temp_hla_length = train_set[train_set.HLA == hla][train_set.length == length]
            if len(temp_hla_length) != 0:
                temp_pos_hla_length_idx = temp_hla_length[temp_hla_length.label == 1].index

                test_num_hla_length = round(len(temp_pos_hla_length_idx) / 5)
                train_num_hla_length = len(temp_pos_hla_length_idx) - test_num_hla_length
                print('Positive {} - Length{}: # All = {} | # Train = {} | # Test = {}'.format(hla, length, len(temp_pos_hla_length_idx), train_num_hla_length, test_num_hla_length))

                pos_fold = []
                for i in range(5):       
                    if i != 4:
                        pos_fold.append(temp_pos_hla_length_idx[i * test_num_hla_length : (i+1) * test_num_hla_length])
                    else:
                        pos_fold.append(temp_pos_hla_length_idx[i * test_num_hla_length : ])

                train_fold, test_fold = [], []
                for fold in [0.0, 1.0, 2.0, 3.0, 4.0]:
                    i = int(fold)
                    temp_neg_hla_length_idx = temp_hla_length[temp_hla_length.label == 0][temp_hla_length.neg_fold == fold].index

                    cv_train_fold = [0, 1, 2, 3, 4]
                    cv_train_fold.remove(i)

                    pos_test_per_fold = pos_fold[i]
                    neg_test_per_fold = temp_neg_hla_length_idx[:len(pos_test_per_fold)]
                    pos_train_per_fold = []
                    for f in cv_train_fold:
                        pos_train_per_fold.extend(list(pos_fold[f]))
                    neg_train_per_fold = temp_neg_hla_length_idx[len(pos_test_per_fold) : len(pos_test_per_fold)+len(pos_train_per_fold)]

                    train_set_cv_idx_dict.setdefault(i, [])
                    test_set_cv_idx_dict.setdefault(i, [])
                    train_set_cv_idx_dict[i].extend(pos_train_per_fold)
                    train_set_cv_idx_dict[i].extend(neg_train_per_fold)
                    test_set_cv_idx_dict[i].extend(pos_test_per_fold)
                    test_set_cv_idx_dict[i].extend(neg_test_per_fold)
                    
    for fold in range(5):
        print(Counter(train_set.iloc[train_set_cv_idx_dict[fold]].label), Counter(train_set.iloc[test_set_cv_idx_dict[fold]].label))

    return train_set_cv_idx_dict, test_set_cv_idx_dict

In [11]:
def save_trainset_cvdata(train_set, train_data_cv_idx_dict, val_data_cv_idx_dict, fold, savepath = True):
    train_fold = train_set.loc[train_data_cv_idx_dict[fold]].sample(frac = 1.0).reset_index(drop = True)
    train_fold.drop(columns = ['neg_fold'], inplace = True)
    if savepath:
        train_fold.to_csv('/home/chujunyi/5_ZY_MHC/Anthem/Dataset/train_data_fold{}.csv'.format(fold))
        
    val_fold = train_set.loc[val_data_cv_idx_dict[0]].sample(frac = 1.0).reset_index(drop = True)
    val_fold.drop(columns = ['neg_fold'], inplace = True)
    if savepath:
        val_fold.to_csv('/home/chujunyi/5_ZY_MHC/Anthem/Dataset/val_data_fold{}.csv'.format(fold))

In [12]:
train_data_cv_idx_dict, val_data_cv_idx_dict = train_set_cv_fold_index(train_set)

  0%|          | 0/112 [00:00<?, ?it/s]

Positive HLA-B*27:01 - Length9: # All = 1512 | # Train = 1210 | # Test = 302
Positive HLA-B*27:01 - Length10: # All = 751 | # Train = 601 | # Test = 150
Positive HLA-B*27:01 - Length11: # All = 440 | # Train = 352 | # Test = 88
Positive HLA-B*27:01 - Length12: # All = 151 | # Train = 121 | # Test = 30
Positive HLA-B*27:01 - Length13: # All = 71 | # Train = 57 | # Test = 14


  1%|          | 1/112 [00:01<02:52,  1.56s/it]

Positive HLA-A*24:06 - Length9: # All = 92 | # Train = 74 | # Test = 18
Positive HLA-A*24:06 - Length10: # All = 76 | # Train = 61 | # Test = 15
Positive HLA-A*24:06 - Length11: # All = 74 | # Train = 59 | # Test = 15


  2%|▏         | 2/112 [00:03<02:50,  1.55s/it]

Positive HLA-B*58:01 - Length8: # All = 305 | # Train = 244 | # Test = 61
Positive HLA-B*58:01 - Length9: # All = 2707 | # Train = 2166 | # Test = 541
Positive HLA-B*58:01 - Length10: # All = 789 | # Train = 631 | # Test = 158
Positive HLA-B*58:01 - Length11: # All = 412 | # Train = 330 | # Test = 82
Positive HLA-B*58:01 - Length12: # All = 139 | # Train = 111 | # Test = 28
Positive HLA-B*58:01 - Length13: # All = 72 | # Train = 58 | # Test = 14


  3%|▎         | 3/112 [00:04<02:50,  1.56s/it]

Positive HLA-B*51:08 - Length8: # All = 68 | # Train = 54 | # Test = 14
Positive HLA-B*51:08 - Length9: # All = 388 | # Train = 310 | # Test = 78


  4%|▎         | 4/112 [00:06<02:46,  1.54s/it]

Positive HLA-C*06:02 - Length8: # All = 136 | # Train = 109 | # Test = 27
Positive HLA-C*06:02 - Length9: # All = 4421 | # Train = 3537 | # Test = 884
Positive HLA-C*06:02 - Length10: # All = 388 | # Train = 310 | # Test = 78
Positive HLA-C*06:02 - Length11: # All = 139 | # Train = 111 | # Test = 28
Positive HLA-C*06:02 - Length12: # All = 100 | # Train = 80 | # Test = 20
Positive HLA-C*06:02 - Length13: # All = 119 | # Train = 95 | # Test = 24


  4%|▍         | 5/112 [00:07<02:47,  1.56s/it]

Positive HLA-C*06:02 - Length14: # All = 144 | # Train = 115 | # Test = 29
Positive HLA-A*03:01 - Length8: # All = 227 | # Train = 182 | # Test = 45
Positive HLA-A*03:01 - Length9: # All = 7340 | # Train = 5872 | # Test = 1468
Positive HLA-A*03:01 - Length10: # All = 1984 | # Train = 1587 | # Test = 397
Positive HLA-A*03:01 - Length11: # All = 658 | # Train = 526 | # Test = 132
Positive HLA-A*03:01 - Length12: # All = 345 | # Train = 276 | # Test = 69
Positive HLA-A*03:01 - Length13: # All = 69 | # Train = 55 | # Test = 14


  5%|▌         | 6/112 [00:09<02:47,  1.58s/it]

Positive HLA-B*13:02 - Length8: # All = 56 | # Train = 45 | # Test = 11
Positive HLA-B*13:02 - Length9: # All = 2412 | # Train = 1930 | # Test = 482
Positive HLA-B*13:02 - Length10: # All = 82 | # Train = 66 | # Test = 16


  6%|▋         | 7/112 [00:10<02:43,  1.56s/it]

Positive HLA-B*27:03 - Length9: # All = 404 | # Train = 323 | # Test = 81
Positive HLA-B*27:03 - Length10: # All = 184 | # Train = 147 | # Test = 37
Positive HLA-B*27:03 - Length11: # All = 144 | # Train = 115 | # Test = 29
Positive HLA-B*27:03 - Length12: # All = 59 | # Train = 47 | # Test = 12


  7%|▋         | 8/112 [00:12<02:44,  1.58s/it]

Positive HLA-B*15:03 - Length9: # All = 327 | # Train = 262 | # Test = 65


  8%|▊         | 9/112 [00:14<02:39,  1.55s/it]

Positive HLA-B*46:01 - Length8: # All = 78 | # Train = 62 | # Test = 16
Positive HLA-B*46:01 - Length9: # All = 1193 | # Train = 954 | # Test = 239
Positive HLA-B*46:01 - Length10: # All = 302 | # Train = 242 | # Test = 60
Positive HLA-B*46:01 - Length11: # All = 69 | # Train = 55 | # Test = 14


  9%|▉         | 10/112 [00:15<02:37,  1.54s/it]

Positive HLA-B*44:02 - Length8: # All = 116 | # Train = 93 | # Test = 23
Positive HLA-B*44:02 - Length9: # All = 4394 | # Train = 3515 | # Test = 879
Positive HLA-B*44:02 - Length10: # All = 1646 | # Train = 1317 | # Test = 329
Positive HLA-B*44:02 - Length11: # All = 746 | # Train = 597 | # Test = 149
Positive HLA-B*44:02 - Length12: # All = 257 | # Train = 206 | # Test = 51
Positive HLA-B*44:02 - Length13: # All = 82 | # Train = 66 | # Test = 16


 10%|▉         | 11/112 [00:17<02:38,  1.57s/it]

Positive HLA-B*57:03 - Length8: # All = 226 | # Train = 181 | # Test = 45
Positive HLA-B*57:03 - Length9: # All = 2167 | # Train = 1734 | # Test = 433
Positive HLA-B*57:03 - Length10: # All = 868 | # Train = 694 | # Test = 174
Positive HLA-B*57:03 - Length11: # All = 667 | # Train = 534 | # Test = 133
Positive HLA-B*57:03 - Length12: # All = 144 | # Train = 115 | # Test = 29
Positive HLA-B*57:03 - Length13: # All = 76 | # Train = 61 | # Test = 15


 11%|█         | 12/112 [00:18<02:37,  1.57s/it]

Positive HLA-A*68:02 - Length9: # All = 3396 | # Train = 2717 | # Test = 679
Positive HLA-A*68:02 - Length10: # All = 1209 | # Train = 967 | # Test = 242
Positive HLA-A*68:02 - Length11: # All = 277 | # Train = 222 | # Test = 55
Positive HLA-A*68:02 - Length12: # All = 151 | # Train = 121 | # Test = 30
Positive HLA-A*68:02 - Length13: # All = 89 | # Train = 71 | # Test = 18


 12%|█▏        | 13/112 [00:20<02:36,  1.58s/it]

Positive HLA-A*68:02 - Length14: # All = 70 | # Train = 56 | # Test = 14
Positive HLA-A*32:15 - Length9: # All = 58 | # Train = 46 | # Test = 12


 12%|█▎        | 14/112 [00:21<02:32,  1.55s/it]

Positive HLA-A*29:02 - Length8: # All = 159 | # Train = 127 | # Test = 32
Positive HLA-A*29:02 - Length9: # All = 4100 | # Train = 3280 | # Test = 820
Positive HLA-A*29:02 - Length10: # All = 978 | # Train = 782 | # Test = 196
Positive HLA-A*29:02 - Length11: # All = 375 | # Train = 300 | # Test = 75
Positive HLA-A*29:02 - Length12: # All = 128 | # Train = 102 | # Test = 26
Positive HLA-A*29:02 - Length13: # All = 148 | # Train = 118 | # Test = 30


 13%|█▎        | 15/112 [00:23<02:31,  1.56s/it]

Positive HLA-B*08:01 - Length8: # All = 924 | # Train = 739 | # Test = 185
Positive HLA-B*08:01 - Length9: # All = 4221 | # Train = 3377 | # Test = 844
Positive HLA-B*08:01 - Length10: # All = 274 | # Train = 219 | # Test = 55
Positive HLA-B*08:01 - Length11: # All = 131 | # Train = 105 | # Test = 26
Positive HLA-B*08:01 - Length12: # All = 145 | # Train = 116 | # Test = 29


 14%|█▍        | 16/112 [00:24<02:30,  1.56s/it]

Positive HLA-C*15:02 - Length8: # All = 152 | # Train = 122 | # Test = 30
Positive HLA-C*15:02 - Length9: # All = 1441 | # Train = 1153 | # Test = 288


 15%|█▌        | 17/112 [00:26<02:29,  1.58s/it]

Positive HLA-A*02:06 - Length9: # All = 3193 | # Train = 2554 | # Test = 639
Positive HLA-A*02:06 - Length10: # All = 849 | # Train = 679 | # Test = 170


 16%|█▌        | 18/112 [00:28<02:25,  1.55s/it]

Positive HLA-A*25:01 - Length9: # All = 96 | # Train = 77 | # Test = 19


 17%|█▋        | 19/112 [00:29<02:21,  1.52s/it]

Positive HLA-B*15:17 - Length9: # All = 393 | # Train = 314 | # Test = 79


 18%|█▊        | 20/112 [00:31<02:21,  1.54s/it]

Positive HLA-C*12:03 - Length8: # All = 117 | # Train = 94 | # Test = 23
Positive HLA-C*12:03 - Length9: # All = 1500 | # Train = 1200 | # Test = 300


 19%|█▉        | 21/112 [00:32<02:18,  1.52s/it]

Positive HLA-A*68:01 - Length9: # All = 5125 | # Train = 4100 | # Test = 1025
Positive HLA-A*68:01 - Length10: # All = 1437 | # Train = 1150 | # Test = 287
Positive HLA-A*68:01 - Length11: # All = 213 | # Train = 170 | # Test = 43
Positive HLA-A*68:01 - Length12: # All = 62 | # Train = 50 | # Test = 12


 20%|█▉        | 22/112 [00:34<02:17,  1.53s/it]

Positive HLA-B*27:20 - Length9: # All = 73 | # Train = 58 | # Test = 15


 21%|██        | 23/112 [00:35<02:17,  1.55s/it]

Positive HLA-A*02:19 - Length9: # All = 198 | # Train = 158 | # Test = 40


 21%|██▏       | 24/112 [00:37<02:13,  1.52s/it]

Positive HLA-B*15:09 - Length9: # All = 82 | # Train = 66 | # Test = 16


 22%|██▏       | 25/112 [00:38<02:10,  1.50s/it]

Positive HLA-A*24:02 - Length8: # All = 175 | # Train = 140 | # Test = 35
Positive HLA-A*24:02 - Length9: # All = 5379 | # Train = 4303 | # Test = 1076
Positive HLA-A*24:02 - Length10: # All = 1372 | # Train = 1098 | # Test = 274
Positive HLA-A*24:02 - Length11: # All = 740 | # Train = 592 | # Test = 148
Positive HLA-A*24:02 - Length12: # All = 245 | # Train = 196 | # Test = 49


 23%|██▎       | 26/112 [00:40<02:11,  1.52s/it]

Positive HLA-A*24:02 - Length13: # All = 156 | # Train = 125 | # Test = 31
Positive HLA-A*24:02 - Length14: # All = 64 | # Train = 51 | # Test = 13
Positive HLA-C*17:01 - Length8: # All = 96 | # Train = 77 | # Test = 19
Positive HLA-C*17:01 - Length9: # All = 335 | # Train = 268 | # Test = 67


 24%|██▍       | 27/112 [00:41<02:11,  1.54s/it]

Positive HLA-B*49:01 - Length8: # All = 510 | # Train = 408 | # Test = 102
Positive HLA-B*49:01 - Length9: # All = 2944 | # Train = 2355 | # Test = 589
Positive HLA-B*49:01 - Length10: # All = 424 | # Train = 339 | # Test = 85
Positive HLA-B*49:01 - Length11: # All = 89 | # Train = 71 | # Test = 18


 25%|██▌       | 28/112 [00:43<02:08,  1.53s/it]

Positive HLA-B*15:01 - Length8: # All = 445 | # Train = 356 | # Test = 89
Positive HLA-B*15:01 - Length9: # All = 9542 | # Train = 7634 | # Test = 1908
Positive HLA-B*15:01 - Length10: # All = 2297 | # Train = 1838 | # Test = 459
Positive HLA-B*15:01 - Length11: # All = 892 | # Train = 714 | # Test = 178
Positive HLA-B*15:01 - Length12: # All = 306 | # Train = 245 | # Test = 61
Positive HLA-B*15:01 - Length13: # All = 200 | # Train = 160 | # Test = 40


 26%|██▌       | 29/112 [00:44<02:09,  1.56s/it]

Positive HLA-B*15:01 - Length14: # All = 126 | # Train = 101 | # Test = 25
Positive HLA-B*48:01 - Length9: # All = 94 | # Train = 75 | # Test = 19


 27%|██▋       | 30/112 [00:46<02:06,  1.54s/it]

Positive HLA-A*30:01 - Length9: # All = 1230 | # Train = 984 | # Test = 246
Positive HLA-A*30:01 - Length10: # All = 156 | # Train = 125 | # Test = 31


 28%|██▊       | 31/112 [00:48<02:06,  1.56s/it]

Positive HLA-A*02:03 - Length9: # All = 2595 | # Train = 2076 | # Test = 519
Positive HLA-A*02:03 - Length10: # All = 1277 | # Train = 1022 | # Test = 255
Positive HLA-A*02:03 - Length11: # All = 101 | # Train = 81 | # Test = 20


 29%|██▊       | 32/112 [00:49<02:03,  1.55s/it]

Positive HLA-B*27:05 - Length8: # All = 624 | # Train = 499 | # Test = 125
Positive HLA-B*27:05 - Length9: # All = 16084 | # Train = 12867 | # Test = 3217
Positive HLA-B*27:05 - Length10: # All = 8011 | # Train = 6409 | # Test = 1602
Positive HLA-B*27:05 - Length11: # All = 5424 | # Train = 4339 | # Test = 1085
Positive HLA-B*27:05 - Length12: # All = 2990 | # Train = 2392 | # Test = 598
Positive HLA-B*27:05 - Length13: # All = 2020 | # Train = 1616 | # Test = 404


 29%|██▉       | 33/112 [00:51<02:09,  1.64s/it]

Positive HLA-B*27:05 - Length14: # All = 1517 | # Train = 1214 | # Test = 303
Positive HLA-B*35:01 - Length8: # All = 184 | # Train = 147 | # Test = 37
Positive HLA-B*35:01 - Length9: # All = 6524 | # Train = 5219 | # Test = 1305
Positive HLA-B*35:01 - Length10: # All = 1115 | # Train = 892 | # Test = 223
Positive HLA-B*35:01 - Length11: # All = 516 | # Train = 413 | # Test = 103
Positive HLA-B*35:01 - Length12: # All = 144 | # Train = 115 | # Test = 29
Positive HLA-B*35:01 - Length13: # All = 66 | # Train = 53 | # Test = 13


 30%|███       | 34/112 [00:53<02:06,  1.62s/it]

Positive HLA-B*35:01 - Length14: # All = 58 | # Train = 46 | # Test = 12
Positive HLA-A*33:01 - Length9: # All = 558 | # Train = 446 | # Test = 112
Positive HLA-A*33:01 - Length10: # All = 521 | # Train = 417 | # Test = 104


 31%|███▏      | 35/112 [00:54<02:01,  1.58s/it]

Positive HLA-A*02:16 - Length9: # All = 152 | # Train = 122 | # Test = 30


 32%|███▏      | 36/112 [00:56<02:00,  1.58s/it]

Positive HLA-C*04:01 - Length8: # All = 628 | # Train = 502 | # Test = 126
Positive HLA-C*04:01 - Length9: # All = 6481 | # Train = 5185 | # Test = 1296
Positive HLA-C*04:01 - Length10: # All = 923 | # Train = 738 | # Test = 185
Positive HLA-C*04:01 - Length11: # All = 255 | # Train = 204 | # Test = 51
Positive HLA-C*04:01 - Length12: # All = 96 | # Train = 77 | # Test = 19
Positive HLA-C*04:01 - Length13: # All = 87 | # Train = 70 | # Test = 17


 33%|███▎      | 37/112 [00:57<01:58,  1.58s/it]

Positive HLA-C*04:01 - Length14: # All = 72 | # Train = 58 | # Test = 14
Positive HLA-A*32:07 - Length9: # All = 70 | # Train = 56 | # Test = 14


 34%|███▍      | 38/112 [00:59<01:54,  1.55s/it]

Positive HLA-B*40:02 - Length8: # All = 1195 | # Train = 956 | # Test = 239
Positive HLA-B*40:02 - Length9: # All = 4125 | # Train = 3300 | # Test = 825
Positive HLA-B*40:02 - Length10: # All = 1449 | # Train = 1159 | # Test = 290
Positive HLA-B*40:02 - Length11: # All = 578 | # Train = 462 | # Test = 116
Positive HLA-B*40:02 - Length12: # All = 163 | # Train = 130 | # Test = 33


 35%|███▍      | 39/112 [01:00<01:56,  1.59s/it]

Positive HLA-B*39:01 - Length8: # All = 104 | # Train = 83 | # Test = 21
Positive HLA-B*39:01 - Length9: # All = 3532 | # Train = 2826 | # Test = 706
Positive HLA-B*39:01 - Length10: # All = 201 | # Train = 161 | # Test = 40
Positive HLA-B*39:01 - Length11: # All = 136 | # Train = 109 | # Test = 27


 36%|███▌      | 40/112 [01:02<01:52,  1.56s/it]

Positive HLA-B*15:11 - Length9: # All = 738 | # Train = 590 | # Test = 148


 37%|███▋      | 41/112 [01:03<01:48,  1.53s/it]

Positive HLA-B*18:03 - Length8: # All = 84 | # Train = 67 | # Test = 17
Positive HLA-B*18:03 - Length9: # All = 128 | # Train = 102 | # Test = 26


 38%|███▊      | 42/112 [01:05<01:48,  1.55s/it]

Positive HLA-B*37:01 - Length8: # All = 484 | # Train = 387 | # Test = 97
Positive HLA-B*37:01 - Length9: # All = 2751 | # Train = 2201 | # Test = 550
Positive HLA-B*37:01 - Length10: # All = 320 | # Train = 256 | # Test = 64
Positive HLA-B*37:01 - Length11: # All = 55 | # Train = 44 | # Test = 11


 38%|███▊      | 43/112 [01:06<01:46,  1.54s/it]

Positive HLA-B*56:01 - Length9: # All = 368 | # Train = 294 | # Test = 74
Positive HLA-B*56:01 - Length10: # All = 142 | # Train = 114 | # Test = 28
Positive HLA-B*56:01 - Length11: # All = 64 | # Train = 51 | # Test = 13


 39%|███▉      | 44/112 [01:08<01:43,  1.52s/it]

Positive HLA-C*16:01 - Length8: # All = 619 | # Train = 495 | # Test = 124
Positive HLA-C*16:01 - Length9: # All = 2609 | # Train = 2087 | # Test = 522
Positive HLA-C*16:01 - Length10: # All = 183 | # Train = 146 | # Test = 37
Positive HLA-C*16:01 - Length11: # All = 53 | # Train = 42 | # Test = 11


 40%|████      | 45/112 [01:09<01:42,  1.53s/it]

Positive HLA-A*02:11 - Length9: # All = 328 | # Train = 262 | # Test = 66


 41%|████      | 46/112 [01:11<01:41,  1.53s/it]

Positive HLA-B*53:01 - Length9: # All = 420 | # Train = 336 | # Test = 84
Positive HLA-B*53:01 - Length10: # All = 235 | # Train = 188 | # Test = 47


 42%|████▏     | 47/112 [01:12<01:38,  1.51s/it]

Positive HLA-B*27:09 - Length8: # All = 76 | # Train = 61 | # Test = 15
Positive HLA-B*27:09 - Length9: # All = 2437 | # Train = 1950 | # Test = 487
Positive HLA-B*27:09 - Length10: # All = 879 | # Train = 703 | # Test = 176
Positive HLA-B*27:09 - Length11: # All = 563 | # Train = 450 | # Test = 113
Positive HLA-B*27:09 - Length12: # All = 241 | # Train = 193 | # Test = 48


 43%|████▎     | 48/112 [01:14<01:38,  1.54s/it]

Positive HLA-B*27:09 - Length13: # All = 144 | # Train = 115 | # Test = 29
Positive HLA-B*27:09 - Length14: # All = 100 | # Train = 80 | # Test = 20
Positive HLA-B*27:06 - Length9: # All = 646 | # Train = 517 | # Test = 129
Positive HLA-B*27:06 - Length10: # All = 198 | # Train = 158 | # Test = 40
Positive HLA-B*27:06 - Length11: # All = 100 | # Train = 80 | # Test = 20


 44%|████▍     | 49/112 [01:16<01:37,  1.54s/it]

Positive HLA-A*26:01 - Length9: # All = 1554 | # Train = 1243 | # Test = 311
Positive HLA-A*26:01 - Length10: # All = 260 | # Train = 208 | # Test = 52


 45%|████▍     | 50/112 [01:17<01:34,  1.52s/it]

Positive HLA-B*83:01 - Length9: # All = 257 | # Train = 206 | # Test = 51


 46%|████▌     | 51/112 [01:19<01:31,  1.50s/it]

Positive HLA-C*03:03 - Length8: # All = 135 | # Train = 108 | # Test = 27
Positive HLA-C*03:03 - Length9: # All = 3789 | # Train = 3031 | # Test = 758
Positive HLA-C*03:03 - Length10: # All = 282 | # Train = 226 | # Test = 56
Positive HLA-C*03:03 - Length11: # All = 76 | # Train = 61 | # Test = 15


 46%|████▋     | 52/112 [01:20<01:32,  1.55s/it]

Positive HLA-B*35:03 - Length9: # All = 2930 | # Train = 2344 | # Test = 586
Positive HLA-B*35:03 - Length10: # All = 195 | # Train = 156 | # Test = 39
Positive HLA-B*35:03 - Length11: # All = 115 | # Train = 92 | # Test = 23


 47%|████▋     | 53/112 [01:22<01:30,  1.53s/it]

Positive HLA-A*69:01 - Length9: # All = 860 | # Train = 688 | # Test = 172
Positive HLA-A*69:01 - Length10: # All = 97 | # Train = 78 | # Test = 19


 48%|████▊     | 54/112 [01:23<01:28,  1.52s/it]

Positive HLA-B*27:07 - Length9: # All = 880 | # Train = 704 | # Test = 176
Positive HLA-B*27:07 - Length10: # All = 414 | # Train = 331 | # Test = 83
Positive HLA-B*27:07 - Length11: # All = 240 | # Train = 192 | # Test = 48
Positive HLA-B*27:07 - Length12: # All = 93 | # Train = 74 | # Test = 19


 49%|████▉     | 55/112 [01:25<01:27,  1.53s/it]

Positive HLA-B*57:01 - Length8: # All = 321 | # Train = 257 | # Test = 64
Positive HLA-B*57:01 - Length9: # All = 4723 | # Train = 3778 | # Test = 945
Positive HLA-B*57:01 - Length10: # All = 2457 | # Train = 1966 | # Test = 491
Positive HLA-B*57:01 - Length11: # All = 2301 | # Train = 1841 | # Test = 460
Positive HLA-B*57:01 - Length12: # All = 1154 | # Train = 923 | # Test = 231
Positive HLA-B*57:01 - Length13: # All = 647 | # Train = 518 | # Test = 129


 50%|█████     | 56/112 [01:26<01:28,  1.58s/it]

Positive HLA-B*57:01 - Length14: # All = 355 | # Train = 284 | # Test = 71
Positive HLA-A*31:01 - Length9: # All = 3247 | # Train = 2598 | # Test = 649
Positive HLA-A*31:01 - Length10: # All = 936 | # Train = 749 | # Test = 187
Positive HLA-A*31:01 - Length11: # All = 262 | # Train = 210 | # Test = 52
Positive HLA-A*31:01 - Length12: # All = 97 | # Train = 78 | # Test = 19
Positive HLA-A*31:01 - Length13: # All = 56 | # Train = 45 | # Test = 11


 51%|█████     | 57/112 [01:28<01:26,  1.57s/it]

Positive HLA-A*68:23 - Length9: # All = 64 | # Train = 51 | # Test = 13


 52%|█████▏    | 58/112 [01:29<01:22,  1.54s/it]

Positive HLA-A*24:13 - Length9: # All = 118 | # Train = 94 | # Test = 24


 53%|█████▎    | 59/112 [01:31<01:22,  1.55s/it]

Positive HLA-B*52:01 - Length8: # All = 304 | # Train = 243 | # Test = 61
Positive HLA-B*52:01 - Length9: # All = 107 | # Train = 86 | # Test = 21


 54%|█████▎    | 60/112 [01:32<01:19,  1.52s/it]

Positive HLA-A*24:03 - Length9: # All = 308 | # Train = 246 | # Test = 62


 54%|█████▍    | 61/112 [01:34<01:16,  1.51s/it]

Positive HLA-A*02:17 - Length9: # All = 114 | # Train = 91 | # Test = 23
Positive HLA-A*02:17 - Length10: # All = 119 | # Train = 95 | # Test = 24


 55%|█████▌    | 62/112 [01:36<01:19,  1.58s/it]

Positive HLA-B*44:03 - Length8: # All = 111 | # Train = 89 | # Test = 22
Positive HLA-B*44:03 - Length9: # All = 3565 | # Train = 2852 | # Test = 713
Positive HLA-B*44:03 - Length10: # All = 1453 | # Train = 1162 | # Test = 291
Positive HLA-B*44:03 - Length11: # All = 586 | # Train = 469 | # Test = 117
Positive HLA-B*44:03 - Length12: # All = 168 | # Train = 134 | # Test = 34


 56%|█████▋    | 63/112 [01:37<01:16,  1.57s/it]

Positive HLA-A*02:05 - Length9: # All = 1831 | # Train = 1465 | # Test = 366
Positive HLA-A*02:05 - Length10: # All = 299 | # Train = 239 | # Test = 60
Positive HLA-A*02:05 - Length11: # All = 68 | # Train = 54 | # Test = 14


 57%|█████▋    | 64/112 [01:39<01:14,  1.55s/it]

Positive HLA-A*02:01 - Length8: # All = 627 | # Train = 502 | # Test = 125
Positive HLA-A*02:01 - Length9: # All = 15673 | # Train = 12538 | # Test = 3135
Positive HLA-A*02:01 - Length10: # All = 4234 | # Train = 3387 | # Test = 847
Positive HLA-A*02:01 - Length11: # All = 2004 | # Train = 1603 | # Test = 401
Positive HLA-A*02:01 - Length12: # All = 912 | # Train = 730 | # Test = 182
Positive HLA-A*02:01 - Length13: # All = 380 | # Train = 304 | # Test = 76


 58%|█████▊    | 65/112 [01:41<01:16,  1.63s/it]

Positive HLA-A*02:01 - Length14: # All = 203 | # Train = 162 | # Test = 41
Positive HLA-A*02:12 - Length9: # All = 276 | # Train = 221 | # Test = 55


 59%|█████▉    | 66/112 [01:42<01:12,  1.57s/it]

Positive HLA-A*26:03 - Length9: # All = 66 | # Train = 53 | # Test = 13


 60%|█████▉    | 67/112 [01:43<01:09,  1.54s/it]

Positive HLA-B*15:18 - Length9: # All = 861 | # Train = 689 | # Test = 172


 61%|██████    | 68/112 [01:45<01:08,  1.56s/it]

Positive HLA-A*02:04 - Length9: # All = 1102 | # Train = 882 | # Test = 220
Positive HLA-A*02:04 - Length10: # All = 120 | # Train = 96 | # Test = 24
Positive HLA-A*02:04 - Length11: # All = 114 | # Train = 91 | # Test = 23


 62%|██████▏   | 69/112 [01:46<01:05,  1.53s/it]

Positive HLA-B*35:08 - Length9: # All = 775 | # Train = 620 | # Test = 155
Positive HLA-B*35:08 - Length10: # All = 184 | # Train = 147 | # Test = 37
Positive HLA-B*35:08 - Length11: # All = 106 | # Train = 85 | # Test = 21


 62%|██████▎   | 70/112 [01:48<01:05,  1.56s/it]

Positive HLA-A*02:02 - Length9: # All = 1208 | # Train = 966 | # Test = 242
Positive HLA-A*02:02 - Length10: # All = 796 | # Train = 637 | # Test = 159


 63%|██████▎   | 71/112 [01:50<01:02,  1.53s/it]

Positive HLA-B*39:24 - Length8: # All = 99 | # Train = 79 | # Test = 20
Positive HLA-B*39:24 - Length9: # All = 388 | # Train = 310 | # Test = 78


 64%|██████▍   | 72/112 [01:51<01:00,  1.51s/it]

Positive HLA-B*45:06 - Length9: # All = 280 | # Train = 224 | # Test = 56


 65%|██████▌   | 73/112 [01:53<01:00,  1.55s/it]

Positive HLA-B*50:01 - Length9: # All = 1256 | # Train = 1005 | # Test = 251
Positive HLA-B*50:01 - Length10: # All = 171 | # Train = 137 | # Test = 34


 66%|██████▌   | 74/112 [01:54<00:58,  1.54s/it]

Positive HLA-B*41:01 - Length9: # All = 433 | # Train = 346 | # Test = 87
Positive HLA-B*41:01 - Length10: # All = 62 | # Train = 50 | # Test = 12


 67%|██████▋   | 75/112 [01:56<00:56,  1.54s/it]

Positive HLA-A*23:01 - Length9: # All = 2433 | # Train = 1946 | # Test = 487
Positive HLA-A*23:01 - Length10: # All = 472 | # Train = 378 | # Test = 94
Positive HLA-A*23:01 - Length11: # All = 103 | # Train = 82 | # Test = 21


 68%|██████▊   | 76/112 [01:57<00:55,  1.54s/it]

Positive HLA-A*32:01 - Length9: # All = 2741 | # Train = 2193 | # Test = 548
Positive HLA-A*32:01 - Length10: # All = 458 | # Train = 366 | # Test = 92
Positive HLA-A*32:01 - Length11: # All = 197 | # Train = 158 | # Test = 39


 69%|██████▉   | 77/112 [01:59<00:54,  1.56s/it]

Positive HLA-B*27:08 - Length9: # All = 750 | # Train = 600 | # Test = 150
Positive HLA-B*27:08 - Length10: # All = 420 | # Train = 336 | # Test = 84
Positive HLA-B*27:08 - Length11: # All = 294 | # Train = 235 | # Test = 59
Positive HLA-B*27:08 - Length12: # All = 124 | # Train = 99 | # Test = 25
Positive HLA-B*27:08 - Length13: # All = 60 | # Train = 48 | # Test = 12


 70%|██████▉   | 78/112 [02:00<00:52,  1.56s/it]

Positive HLA-B*18:01 - Length8: # All = 416 | # Train = 333 | # Test = 83
Positive HLA-B*18:01 - Length9: # All = 1661 | # Train = 1329 | # Test = 332
Positive HLA-B*18:01 - Length10: # All = 168 | # Train = 134 | # Test = 34


 71%|███████   | 79/112 [02:02<00:51,  1.55s/it]

Positive HLA-B*38:01 - Length9: # All = 3119 | # Train = 2495 | # Test = 624


 71%|███████▏  | 80/112 [02:04<00:49,  1.55s/it]

Positive HLA-B*54:01 - Length8: # All = 99 | # Train = 79 | # Test = 20
Positive HLA-B*54:01 - Length9: # All = 695 | # Train = 556 | # Test = 139
Positive HLA-B*54:01 - Length10: # All = 260 | # Train = 208 | # Test = 52
Positive HLA-B*54:01 - Length11: # All = 117 | # Train = 94 | # Test = 23


 72%|███████▏  | 81/112 [02:05<00:47,  1.55s/it]

Positive HLA-C*07:02 - Length8: # All = 208 | # Train = 166 | # Test = 42
Positive HLA-C*07:02 - Length9: # All = 2485 | # Train = 1988 | # Test = 497
Positive HLA-C*07:02 - Length10: # All = 216 | # Train = 173 | # Test = 43
Positive HLA-C*07:02 - Length11: # All = 79 | # Train = 63 | # Test = 16


 73%|███████▎  | 82/112 [02:07<00:46,  1.55s/it]

Positive HLA-C*01:02 - Length8: # All = 216 | # Train = 173 | # Test = 43
Positive HLA-C*01:02 - Length9: # All = 1046 | # Train = 837 | # Test = 209
Positive HLA-C*01:02 - Length10: # All = 324 | # Train = 259 | # Test = 65
Positive HLA-C*01:02 - Length11: # All = 175 | # Train = 140 | # Test = 35
Positive HLA-C*01:02 - Length12: # All = 161 | # Train = 129 | # Test = 32


 74%|███████▍  | 83/112 [02:08<00:45,  1.55s/it]

Positive HLA-A*66:01 - Length9: # All = 74 | # Train = 59 | # Test = 15


 75%|███████▌  | 84/112 [02:10<00:43,  1.54s/it]

Positive HLA-B*45:01 - Length9: # All = 1713 | # Train = 1370 | # Test = 343
Positive HLA-B*45:01 - Length10: # All = 431 | # Train = 345 | # Test = 86
Positive HLA-B*45:01 - Length11: # All = 124 | # Train = 99 | # Test = 25


 76%|███████▌  | 85/112 [02:11<00:41,  1.54s/it]

Positive HLA-A*02:20 - Length9: # All = 802 | # Train = 642 | # Test = 160


 77%|███████▋  | 86/112 [02:13<00:39,  1.54s/it]

Positive HLA-B*27:02 - Length9: # All = 1025 | # Train = 820 | # Test = 205
Positive HLA-B*27:02 - Length10: # All = 720 | # Train = 576 | # Test = 144
Positive HLA-B*27:02 - Length11: # All = 465 | # Train = 372 | # Test = 93
Positive HLA-B*27:02 - Length12: # All = 233 | # Train = 186 | # Test = 47
Positive HLA-B*27:02 - Length13: # All = 107 | # Train = 86 | # Test = 21


 78%|███████▊  | 87/112 [02:14<00:38,  1.55s/it]

Positive HLA-B*14:01 - Length9: # All = 76 | # Train = 61 | # Test = 15


 79%|███████▊  | 88/112 [02:16<00:36,  1.54s/it]

Positive HLA-C*08:02 - Length8: # All = 648 | # Train = 518 | # Test = 130
Positive HLA-C*08:02 - Length9: # All = 3928 | # Train = 3142 | # Test = 786
Positive HLA-C*08:02 - Length10: # All = 446 | # Train = 357 | # Test = 89
Positive HLA-C*08:02 - Length11: # All = 116 | # Train = 93 | # Test = 23


 79%|███████▉  | 89/112 [02:17<00:35,  1.55s/it]

Positive HLA-C*05:01 - Length8: # All = 574 | # Train = 459 | # Test = 115
Positive HLA-C*05:01 - Length9: # All = 2928 | # Train = 2342 | # Test = 586
Positive HLA-C*05:01 - Length10: # All = 468 | # Train = 374 | # Test = 94
Positive HLA-C*05:01 - Length11: # All = 236 | # Train = 189 | # Test = 47
Positive HLA-C*05:01 - Length12: # All = 76 | # Train = 61 | # Test = 15
Positive HLA-C*05:01 - Length13: # All = 60 | # Train = 48 | # Test = 12


 80%|████████  | 90/112 [02:19<00:34,  1.57s/it]

Positive HLA-C*05:01 - Length14: # All = 63 | # Train = 50 | # Test = 13
Positive HLA-A*02:07 - Length9: # All = 1946 | # Train = 1557 | # Test = 389
Positive HLA-A*02:07 - Length10: # All = 364 | # Train = 291 | # Test = 73
Positive HLA-A*02:07 - Length11: # All = 300 | # Train = 240 | # Test = 60


 81%|████████▏ | 91/112 [02:21<00:32,  1.57s/it]

Positive HLA-B*51:01 - Length8: # All = 884 | # Train = 707 | # Test = 177
Positive HLA-B*51:01 - Length9: # All = 2724 | # Train = 2179 | # Test = 545
Positive HLA-B*51:01 - Length10: # All = 652 | # Train = 522 | # Test = 130
Positive HLA-B*51:01 - Length11: # All = 357 | # Train = 286 | # Test = 71
Positive HLA-B*51:01 - Length12: # All = 281 | # Train = 225 | # Test = 56
Positive HLA-B*51:01 - Length13: # All = 76 | # Train = 61 | # Test = 15


 82%|████████▏ | 92/112 [02:22<00:31,  1.58s/it]

Positive HLA-A*26:02 - Length9: # All = 188 | # Train = 150 | # Test = 38


 83%|████████▎ | 93/112 [02:24<00:29,  1.56s/it]

Positive HLA-B*15:02 - Length9: # All = 211 | # Train = 169 | # Test = 42


 84%|████████▍ | 94/112 [02:25<00:27,  1.54s/it]

Positive HLA-A*30:02 - Length9: # All = 563 | # Train = 450 | # Test = 113
Positive HLA-A*30:02 - Length10: # All = 282 | # Train = 226 | # Test = 56


 85%|████████▍ | 95/112 [02:27<00:26,  1.54s/it]

Positive HLA-B*07:02 - Length8: # All = 723 | # Train = 578 | # Test = 145
Positive HLA-B*07:02 - Length9: # All = 8205 | # Train = 6564 | # Test = 1641
Positive HLA-B*07:02 - Length10: # All = 2141 | # Train = 1713 | # Test = 428
Positive HLA-B*07:02 - Length11: # All = 1084 | # Train = 867 | # Test = 217
Positive HLA-B*07:02 - Length12: # All = 436 | # Train = 349 | # Test = 87
Positive HLA-B*07:02 - Length13: # All = 256 | # Train = 205 | # Test = 51


 86%|████████▌ | 96/112 [02:28<00:25,  1.58s/it]

Positive HLA-B*07:02 - Length14: # All = 138 | # Train = 110 | # Test = 28
Positive HLA-C*07:01 - Length8: # All = 177 | # Train = 142 | # Test = 35
Positive HLA-C*07:01 - Length9: # All = 2843 | # Train = 2274 | # Test = 569
Positive HLA-C*07:01 - Length10: # All = 317 | # Train = 254 | # Test = 63
Positive HLA-C*07:01 - Length11: # All = 161 | # Train = 129 | # Test = 32
Positive HLA-C*07:01 - Length12: # All = 143 | # Train = 114 | # Test = 29


 87%|████████▋ | 97/112 [02:30<00:23,  1.57s/it]

Positive HLA-B*39:06 - Length9: # All = 1263 | # Train = 1010 | # Test = 253


 88%|████████▊ | 98/112 [02:32<00:21,  1.56s/it]

Positive HLA-B*14:02 - Length8: # All = 421 | # Train = 337 | # Test = 84
Positive HLA-B*14:02 - Length9: # All = 2619 | # Train = 2095 | # Test = 524
Positive HLA-B*14:02 - Length10: # All = 154 | # Train = 123 | # Test = 31


 88%|████████▊ | 99/112 [02:33<00:20,  1.55s/it]

Positive HLA-C*02:02 - Length8: # All = 95 | # Train = 76 | # Test = 19
Positive HLA-C*02:02 - Length9: # All = 4128 | # Train = 3302 | # Test = 826
Positive HLA-C*02:02 - Length10: # All = 491 | # Train = 393 | # Test = 98
Positive HLA-C*02:02 - Length11: # All = 101 | # Train = 81 | # Test = 20


 89%|████████▉ | 100/112 [02:35<00:18,  1.56s/it]

Positive HLA-B*15:42 - Length9: # All = 283 | # Train = 226 | # Test = 57


 90%|█████████ | 101/112 [02:36<00:17,  1.57s/it]

Positive HLA-A*11:01 - Length8: # All = 68 | # Train = 54 | # Test = 14
Positive HLA-A*11:01 - Length9: # All = 5409 | # Train = 4327 | # Test = 1082
Positive HLA-A*11:01 - Length10: # All = 2352 | # Train = 1882 | # Test = 470
Positive HLA-A*11:01 - Length11: # All = 930 | # Train = 744 | # Test = 186
Positive HLA-A*11:01 - Length12: # All = 244 | # Train = 195 | # Test = 49
Positive HLA-A*11:01 - Length13: # All = 70 | # Train = 56 | # Test = 14


 91%|█████████ | 102/112 [02:38<00:15,  1.57s/it]

Positive HLA-A*01:01 - Length8: # All = 267 | # Train = 214 | # Test = 53
Positive HLA-A*01:01 - Length9: # All = 4173 | # Train = 3338 | # Test = 835
Positive HLA-A*01:01 - Length10: # All = 1787 | # Train = 1430 | # Test = 357
Positive HLA-A*01:01 - Length11: # All = 922 | # Train = 738 | # Test = 184
Positive HLA-A*01:01 - Length12: # All = 688 | # Train = 550 | # Test = 138
Positive HLA-A*01:01 - Length13: # All = 436 | # Train = 349 | # Test = 87


 92%|█████████▏| 103/112 [02:39<00:14,  1.58s/it]

Positive HLA-A*01:01 - Length14: # All = 204 | # Train = 163 | # Test = 41
Positive HLA-B*27:04 - Length9: # All = 1653 | # Train = 1322 | # Test = 331
Positive HLA-B*27:04 - Length10: # All = 236 | # Train = 189 | # Test = 47
Positive HLA-B*27:04 - Length11: # All = 103 | # Train = 82 | # Test = 21


 93%|█████████▎| 104/112 [02:41<00:12,  1.56s/it]

Positive HLA-C*07:04 - Length8: # All = 76 | # Train = 61 | # Test = 15
Positive HLA-C*07:04 - Length9: # All = 2179 | # Train = 1743 | # Test = 436
Positive HLA-C*07:04 - Length10: # All = 126 | # Train = 101 | # Test = 25


 94%|█████████▍| 105/112 [02:42<00:10,  1.56s/it]

Positive HLA-B*44:27 - Length10: # All = 54 | # Train = 43 | # Test = 11


 95%|█████████▍| 106/112 [02:44<00:09,  1.54s/it]

Positive HLA-A*80:01 - Length9: # All = 142 | # Train = 114 | # Test = 28


 96%|█████████▌| 107/112 [02:45<00:07,  1.53s/it]

Positive HLA-A*02:50 - Length9: # All = 71 | # Train = 57 | # Test = 14


 96%|█████████▋| 108/112 [02:47<00:06,  1.55s/it]

Positive HLA-B*40:01 - Length8: # All = 81 | # Train = 65 | # Test = 16
Positive HLA-B*40:01 - Length9: # All = 2868 | # Train = 2294 | # Test = 574
Positive HLA-B*40:01 - Length10: # All = 900 | # Train = 720 | # Test = 180
Positive HLA-B*40:01 - Length11: # All = 438 | # Train = 350 | # Test = 88
Positive HLA-B*40:01 - Length12: # All = 122 | # Train = 98 | # Test = 24


 97%|█████████▋| 109/112 [02:49<00:04,  1.55s/it]

Positive HLA-B*73:01 - Length9: # All = 408 | # Train = 326 | # Test = 82


 98%|█████████▊| 110/112 [02:50<00:03,  1.53s/it]

Positive HLA-C*03:04 - Length8: # All = 226 | # Train = 181 | # Test = 45
Positive HLA-C*03:04 - Length9: # All = 3374 | # Train = 2699 | # Test = 675
Positive HLA-C*03:04 - Length10: # All = 324 | # Train = 259 | # Test = 65
Positive HLA-C*03:04 - Length11: # All = 99 | # Train = 79 | # Test = 20


 99%|█████████▉| 111/112 [02:52<00:01,  1.54s/it]

Positive HLA-C*14:02 - Length8: # All = 388 | # Train = 310 | # Test = 78
Positive HLA-C*14:02 - Length9: # All = 1462 | # Train = 1170 | # Test = 292
Positive HLA-C*14:02 - Length10: # All = 253 | # Train = 202 | # Test = 51


100%|██████████| 112/112 [02:53<00:00,  1.55s/it]


Counter({1: 287329, 0: 287329}) Counter({1: 71837, 0: 71837})
Counter({1: 287329, 0: 287329}) Counter({1: 71837, 0: 71837})
Counter({1: 287329, 0: 287329}) Counter({1: 71837, 0: 71837})
Counter({1: 287329, 0: 287329}) Counter({1: 71837, 0: 71837})
Counter({1: 287348, 0: 287348}) Counter({1: 71818, 0: 71818})


NameError: name 'fold' is not defined

In [13]:
for fold in range(5):
    save_trainset_cvdata(train_set, train_data_cv_idx_dict, val_data_cv_idx_dict, fold, savepath = True)

In [14]:
Counter(pd.read_csv('/home/chujunyi/5_ZY_MHC/Anthem/Dataset/val_data_fold0.csv', index_col = 0).label)

Counter({0: 71837, 1: 71837})