In [32]:
import tensorflow as tf
import numpy as np

def load_tsv_format_data(filename, skip_head=True):
    sequences = []
    labels = []

    with open(filename, 'r') as file:
        if skip_head:
            next(file)
        for line in file:
            if line[-1] == '\n':
                line = line[:-1]
            items = line.split('\t')
            sequences.append(items[2])
            labels.append(int(items[1]))

    return sequences, labels

def load_dataset(data_path):
    One_hot = {'A': [1, 0, 0, 0],
               'T': [0, 1, 0, 0],
               'G': [0, 0, 1, 0],
               'C': [0, 0, 0, 1]}
    NCP = {'A': [1, 1, 1],
           'T': [0, 1, 0],
           'G': [1, 0, 0],
           'C': [0, 0, 1]}
    DPCP = {'AA': [0.5773884923447732, 0.6531915653378907, 0.6124592000985356, 0.8402684612384332, 0.5856582729115565,
                   0.5476708282666789],
            'AT': [0.7512077598863804, 0.6036675879079278, 0.6737051546096536, 0.39069870063063133, 1.0,
                   0.76847598772376],
            'AG': [0.7015450873735896, 0.6284296628760702, 0.5818362228429766, 0.6836002897416182, 0.5249586459219764,
                   0.45903777008667923],
            'AC': [0.8257018549087278, 0.6531915653378907, 0.7043281318652126, 0.5882368974116978, 0.7888705476333944,
                   0.7467063799220581],
            'TA': [0.3539063797840531, 0.15795248106354978, 0.48996729107629966, 0.1795369895818257, 0.3059118434042811,
                   0.32686549630327577],
            'TT': [0.5773884923447732, 0.6531915653378907, 0.0, 0.8402684612384332, 0.5856582729115565,
                   0.5476708282666789],
            'TG': [0.32907512978081865, 0.3312861433089369, 0.5205902683318586, 0.4179453841534657, 0.45898067049412195,
                   0.3501900760908136],
            'TC': [0.5525570698352168, 0.6531915653378907, 0.6124592000985356, 0.5882368974116978, 0.49856742124957026,
                   0.6891727614587756],
            'GA': [0.5525570698352168, 0.6531915653378907, 0.6124592000985356, 0.5882368974116978, 0.49856742124957026,
                   0.6891727614587756],
            'GT': [0.8257018549087278, 0.6531915653378907, 0.7043281318652126, 0.5882368974116978, 0.7888705476333944,
                   0.7467063799220581],
            'GG': [0.5773884923447732, 0.7522393476914946, 0.5818362228429766, 0.6631651908463315, 0.4246720956706261,
                   0.6083143907016332],
            'GC': [0.5525570698352168, 0.6036675879079278, 0.7961968911255676, 0.5064970193495165, 0.6780274730118172,
                   0.8400043540595654],
            'CA': [0.32907512978081865, 0.3312861433089369, 0.5205902683318586, 0.4179453841534657, 0.45898067049412195,
                   0.3501900760908136],
            'CT': [0.7015450873735896, 0.6284296628760702, 0.5818362228429766, 0.6836002897416182, 0.5249586459219764,
                   0.45903777008667923],
            'CG': [0.2794124572680277, 0.3560480457707574, 0.48996729107629966, 0.4247569687810134, 0.5170412957708868,
                   0.32686549630327577],
            'CC': [0.5773884923447732, 0.7522393476914946, 0.5818362228429766, 0.6631651908463315, 0.4246720956706261,
                   0.6083143907016332]}

    sequences, label_list = load_tsv_format_data(data_path)

    One_hot_matrix_list, NCP_matrix_list, DPCP_matrix_list = [], [], []

    for sequence_cur in sequences:
        if len(sequence_cur) < 41:
            print('The input sequence ''%s'' does not meet the minimum length requirement of 40.' % sequence_cur)
        else:
            One_hot_matrix_middle = np.zeros([4, 38])
            NCP_matrix_middle = np.zeros([3, 38])
            DPCP_matrix_middle = np.zeros([6, 38])
            for pos in range(38):
                One_hot_matrix_middle[0:4, pos] += np.asarray(np.float32(One_hot[sequence_cur[pos + 1]]))
                NCP_matrix_middle[0:3, pos] += np.asarray(np.float32(NCP[sequence_cur[pos + 1]]))
                DPCP_matrix_middle[0:6, pos] += np.asarray(np.float32(DPCP[sequence_cur[pos:pos + 2]]))

            for left in range(len(sequence_cur) - 40):
                right = left + 40

                One_hot_matrix_left = np.asarray(np.float32(One_hot[sequence_cur[left]]))
                One_hot_matrix_right1 = np.asarray(np.float32(One_hot[sequence_cur[right - 1]]))
                One_hot_matrix_right2 = np.asarray(np.float32(One_hot[sequence_cur[right]]))
                One_hot_matrix_right = np.concatenate((One_hot_matrix_right1[:, np.newaxis], One_hot_matrix_right2[:, np.newaxis]), axis=1)
                One_hot_matrix_cur = np.concatenate((One_hot_matrix_left[:, np.newaxis], One_hot_matrix_middle, One_hot_matrix_right), axis=1)

                NCP_matrix_left = np.asarray(np.float32(NCP[sequence_cur[left]]))
                NCP_matrix_right1 = np.asarray(np.float32(NCP[sequence_cur[right - 1]]))
                NCP_matrix_right2 = np.asarray(np.float32(NCP[sequence_cur[right]]))
                NCP_matrix_right = np.concatenate((NCP_matrix_right1[:, np.newaxis], NCP_matrix_right2[:, np.newaxis]), axis=1)
                NCP_matrix_cur = np.concatenate((NCP_matrix_left[:, np.newaxis], NCP_matrix_middle, NCP_matrix_right), axis=1)

                DPCP_matrix_left = np.asarray(np.float32(DPCP[sequence_cur[left:left + 2]]))
                DPCP_matrix_right1 =  np.asarray(np.float32(DPCP[sequence_cur[right - 2:right]]))
                DPCP_matrix_right2 = np.asarray(np.float32(DPCP[sequence_cur[right - 1:right + 1]]))
                DPCP_matrix_right = np.concatenate((DPCP_matrix_right1[:, np.newaxis], DPCP_matrix_right2[:, np.newaxis]), axis=1)
                DPCP_matrix_cur = np.concatenate((DPCP_matrix_left[:, np.newaxis], DPCP_matrix_middle, DPCP_matrix_right), axis=1)

                One_hot_matrix_list.append(One_hot_matrix_cur)
                NCP_matrix_list.append(NCP_matrix_cur)
                DPCP_matrix_list.append(DPCP_matrix_cur)

    One_hot_matrix_input = np.asarray([i for i in One_hot_matrix_list], dtype=np.float32)
    NCP_matrix_input = np.asarray([i for i in NCP_matrix_list], dtype=np.float32)
    DPCP_matrix_input = np.asarray([i for i in DPCP_matrix_list], dtype=np.float32)
    label_list_input = np.asarray([i for i in label_list], dtype=np.int64)
    all_matrix_input = np.concatenate((One_hot_matrix_input, NCP_matrix_input, DPCP_matrix_input), axis=1)
    all_matrix_input= tf.transpose(all_matrix_input,perm=[0, 2, 1])
    return sequences, label_list_input, One_hot_matrix_input, NCP_matrix_input, DPCP_matrix_input, all_matrix_input


In [33]:
path= r"C:\Users\Devatraj\MLproj\ipynb\DNA-Sequence-Cancer-Diagnosis\Dataset-tsv"
sequences, labels = load_tsv_format_data(path)
sequence_list, label_list, One_hot_matrix_input, NCP_matrix_input, DPCP_matrix_input, all_list_input = load_dataset(path)
print(label_list.shape)
print(One_hot_matrix_input.shape)
print(NCP_matrix_input.shape)
print(DPCP_matrix_input.shape)
print(all_list_input.shape)

(3166,)
(3166, 4, 41)
(3166, 3, 41)
(3166, 6, 41)
(3166, 41, 13)


In [34]:
import tensorflow as tf
import keras

from keras import layers

def my_model(input_tensor, input_channels):
    x = layers.Conv1D(filters=64, kernel_size=16, strides=1, padding='same', kernel_initializer = 'glorot_normal', input_shape=(None, input_channels))(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    y = layers.Conv1D(filters=64, kernel_size=16, strides=1, padding='same',kernel_initializer = 'glorot_normal')(x)
    y = layers.BatchNormalization()(y)
    y = layers.LeakyReLU(alpha=0.2)(y)
    int_norm1 = layers.Add()([x,y])
    #     x = layers.MaxPooling1D(2, 2)(tf.nn.relu(x))
    
    #     x = layers.MaxPooling1D(2, 2)(tf.nn.relu(x))
    #     x = layers.Flatten()(x)
    #     x = layers.Dense(units=256, activation='relu')(x)
    #     x = layers.Dense(units=2)(x)
    kernels = range(2, 32, 4)
    nets = []
    for i in range(len(kernels)): 
        conv3 = layers.Conv1D(filters = 64, kernel_size = 8, padding = 'same', 
                              name='conv3_' + str(i), kernel_initializer = 'glorot_normal')(int_norm1)
        norm3 = layers.BatchNormalization(name='norm3_' + str(i))(conv3)
        norm3 = layers.LeakyReLU(alpha=0.2)(norm3)

        int_norm2 = layers.Add()([int_norm1, norm3])
 
        conv4 = layers.Conv1D(filters = 128, kernel_size = kernels[i], padding = 'valid', 
                              name='conv4_' + str(i), kernel_initializer = 'glorot_normal')(int_norm2)
        norm4 = layers.BatchNormalization(name='norm4_' + str(i))(conv4)
        norm4 = layers.LeakyReLU(alpha=0.2)(norm4)
 
        pool = layers.MaxPooling1D(2,2)(norm4)
        flat = layers.Flatten(name='flat_' + str(i))(pool)
        nets.append(flat)
    net = layers.Concatenate(axis=1)(nets)
    for i in range(0):
        net = layers.Dense(2, activation='relu', name='dense_' + str(i))(net)
 
    net = layers.Dropout(0.4)(net)
    net = layers.Dense(2, activation='sigmoid', name='dense_out_6')(net)
 
    return net

input_tensor = tf.keras.Input(shape=(41,13))
output_tensor = my_model(input_tensor,13)
model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)

tmp = tf.random.normal((10, 41, 13))
out = model(tmp)
print('MyModel:', out.shape)

MyModel: (10, 2)


In [35]:
import tensorflow as tf
import random
import numpy as np
import keras
from keras import layers
from keras.initializers import glorot_uniform

def initialize(layer):
    if isinstance(layer, tf.keras.layers.Conv2D) or isinstance(layer, tf.keras.layers.Dense):
        layer.kernel_initializer = glorot_uniform()
        if layer.bias is not None:
            layer.bias_initializer = tf.keras.initializers.Zeros()

def obtain_random(length):
    list_info = []
    while True:
        info = random.randint(0,length-1)
        if info not in list_info:
            list_info.append(info)
        if len(list_info) ==length:
            break

    return list_info

def cat_batch(input_list, info_list, idx, batch_size, batch_num):
    if idx == (batch_num - 1):
        batch = info_list[idx * batch_size:]
    else:
        batch = info_list[idx * batch_size: (idx + 1) * batch_size]

    for idx, x in enumerate(batch):
        if idx==0:
          catbatch = tf.expand_dims(input_list[x], axis=0)
        else:
          x_tensor = tf.expand_dims(input_list[x], axis=0)
          catbatch = tf.concat([catbatch, x_tensor], axis=0)

    return catbatch

train_data_path = r"C:\Users\Devatraj\MLproj\ipynb\DNA-Sequence-Cancer-Diagnosis\Dataset-tsv\train.tsv"
train_sequence_list, train_label_list, train_One_hot_matrix_input, train_NCP_matrix_input, train_DPCP_matrix_input, train_all_matrix_input = load_dataset(train_data_path)
train_list_info = obtain_random(len(train_sequence_list))

val_data_path = r"C:\Users\Devatraj\MLproj\ipynb\DNA-Sequence-Cancer-Diagnosis\Dataset-tsv\test.csv"
val_sequence_list, val_label_list,  val_One_hot_matrix_input, val_NCP_matrix_input, val_DPCP_matrix_input, val_all_matrix_input = load_dataset(val_data_path)
val_list_info = obtain_random(len(val_sequence_list))


input_tensor = tf.keras.Input(shape=(41, 13))
output_tensor = my_model(input_tensor,13)
model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)

initialize(model)

# if len(physical_devices) > 0:
#     model = tf.keras.utils.multi_gpu_model(model, gpus=len(physical_devices))

class CustomLearningRateSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, initial_learning_rate, decay_factor, decay_steps):
        self.initial_learning_rate = initial_learning_rate
        self.decay_factor = decay_factor
        self.decay_steps = decay_steps

    def __call__(self, step):
        val = tf.dtypes.cast(step // self.decay_steps, tf.float32)
        return self.initial_learning_rate * tf.pow(tf.constant(self.decay_factor, dtype=tf.float32), val)

lr1=0.01
lr_schedule = CustomLearningRateSchedule(initial_learning_rate=lr1, decay_factor=0.7, decay_steps=4)

optimizer = tf.keras.optimizers.SGD(
    learning_rate=lr_schedule,
    momentum=0.9,
    nesterov=False,
    weight_decay=0.01,
    name='SGD')
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])



In [36]:
batch_size = 16
train_patience = 10
best_acc, patience = None, 0

for epoch in range(100):
    if patience == train_patience:
        print(f"val_loss did not improve after {train_patience} Epochs, thus Earlystopping is calling")
        break

    cnt, loss_sum = 0, 0
    val = epoch // 4
    lr = 1e-2 * pow(0.7, val)
    if lr < 1e-5:
        lr = 1e-5

    print(lr)

    batch_num = np.ceil(len(train_list_info) / batch_size).astype(int)
    for idx in range(batch_num):
        with tf.GradientTape() as tape:
            x = cat_batch(train_all_matrix_input, train_list_info, idx, batch_size, batch_num)
            label = cat_batch(train_label_list, train_list_info, idx, batch_size, batch_num)
            label = tf.one_hot(label, depth=2)
            logits = model(x, training=True)
            loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(label, logits))

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        loss_sum += loss.numpy()
        cnt += 1

    final_loss = loss_sum / cnt
    print(f"Epoch: {epoch + 1}, Train_Loss: {final_loss}")

    if epoch % 1 == 0:
        cnt, total_correct = 0, 0
        batch_num = np.ceil(len(val_list_info) / batch_size).astype(int)
        for idx in range(batch_num):
            x = cat_batch(val_all_matrix_input, val_list_info, idx, batch_size, batch_num)
            label = cat_batch(val_label_list, val_list_info, idx, batch_size, batch_num)

            logits = model(x)
            pred = tf.argmax(logits, axis=1)
            correct = tf.reduce_sum(tf.cast(tf.equal(pred, label), dtype=tf.int32))
            total_correct += correct.numpy()
            cnt += x.shape[0]

        acc = total_correct / cnt

        if best_acc is None or acc > best_acc:
            best_acc, patience = acc, 0
            model.save_weights("1.tf")
        else:
            patience += 1

        print(f"Epoch: {epoch + 1}, Valid_acc: {acc}")

0.01
Epoch: 1, Train_Loss: 7.426956513915399
Epoch: 1, Valid_acc: 0.6737207833228048
0.01
Epoch: 2, Train_Loss: 5.677877791150652
Epoch: 2, Valid_acc: 0.6740366392924826
0.01
Epoch: 3, Train_Loss: 5.507106178186157
Epoch: 3, Valid_acc: 0.6737207833228048
0.01
Epoch: 4, Train_Loss: 5.791163147775212
Epoch: 4, Valid_acc: 0.6740366392924826
0.006999999999999999
Epoch: 5, Train_Loss: 5.613406137742027
Epoch: 5, Valid_acc: 0.6740366392924826
0.006999999999999999
Epoch: 6, Train_Loss: 5.646482059148827
Epoch: 6, Valid_acc: 0.6740366392924826
0.006999999999999999
Epoch: 7, Train_Loss: 5.453591563127408
Epoch: 7, Valid_acc: 0.6740366392924826
0.006999999999999999
Epoch: 8, Train_Loss: 5.5370392665417505
Epoch: 8, Valid_acc: 0.6740366392924826
0.0049
Epoch: 9, Train_Loss: 5.652509670516457
Epoch: 9, Valid_acc: 0.6740366392924826
0.0049
Epoch: 10, Train_Loss: 5.5882238923299195
Epoch: 10, Valid_acc: 0.6740366392924826
0.0049
Epoch: 11, Train_Loss: 5.52189742833978
Epoch: 11, Valid_acc: 0.6740366

In [37]:
import csv
import numpy
import math
def eff(labels, preds):

    TP, FN, FP, TN = 0, 0, 0, 0

    for idx,label in enumerate(labels):

        if label == 1:
            if label == preds[idx]:
                TP += 1
            else: FN += 1
        elif label == preds[idx]:
            TN += 1
        else: FP += 1

    return TP, FN, FP, TN

def Judeff(TP, FN, FP, TN):

    SN = TP / (TP + FN)
    SP = TN / (TN + FP)
    ACC = (TP + TN) / (TP + FN + FP + TN)
    # MCC = (TP * TN - FP * FN) / (math.sqrt((TP + FN) * (TP + FP) * (TN + FP) * (TN + FN)))

    return SN, SP, ACC

def Calauc(labels, preds):

    labels = tf.convert_to_tensor(labels)
    preds = tf.convert_to_tensor(preds)

    labels_np = labels.numpy()
    preds_np = preds.numpy()

    f = list(zip(preds, labels))
    rank = [values2 for values1, values2 in sorted(f, key=lambda x: x[0])]
    rankList = [i + 1 for i in range(len(rank)) if rank[i] == 1]
    pos_cnt = np.sum(labels == 1)
    neg_cnt = np.sum(labels == 0)
    AUC = (np.sum(rankList) - pos_cnt * (pos_cnt + 1) / 2) / (pos_cnt * neg_cnt)

    return AUC


In [None]:
import os
folder_path = r"C:\Users\Devatraj\MLproj\ipynb\DNA-Sequence-Cancer-Diagnosis\Dataset-tsv"

dataset_files = [f for f in os.listdir(folder_path) if f.endswith(".tsv")]
for dataset_file in dataset_files:
    dataset_path = os.path.join(folder_path, dataset_file)

    test_sequence_list, test_label_list, test_One_hot_matrix_input, test_NCP_matrix_input, test_DPCP_matrix_input, test_all_matrix_input = load_dataset(dataset_path)
    test_list_info = obtain_random(len(test_label_list))
    batch_size = len(test_sequence_list)
    input_tensor = tf.keras.Input(shape=(41, 13))
    output_tensor = my_model(input_tensor,13)
    model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)
    model.load_weights("1.tf")
    model.compile()
    TP, FN, FP, TN = 0, 0, 0, 0
    AUC = 0

    batch_num = int(np.ceil(len(test_list_info) / batch_size))

    for idx in range(batch_num):
        x = cat_batch(test_all_matrix_input, test_list_info, idx, batch_size, batch_num)
        label = cat_batch(test_label_list, test_list_info, idx, batch_size, batch_num)

        logits = model(x)
        pred = tf.argmax(logits, axis=1)

        A, B, C, D = eff(label, pred)
        TP += A
        FN += B
        FP += C
        TN += D
        AUC += Calauc(label, pred)

    SN, SP, ACC = Judeff(TP, FN, FP, TN)
    print(f"Results for dataset: {dataset_file}")
    print("TP: {}, FN: {}, FP: {}, TN: {}".format(TP, FN, FP, TN))
    print("SN: {}, SP: {}, ACC: {}, AUC: {}".format(SN, SP, ACC, AUC / batch_num))
    modelname = 'medcnn'
    modelname = f'{dataset_file.split(".")[0]}_{modelname}'
    date = [modelname, TP, FN, FP, TN, SN, SP, ACC, AUC / batch_num]

    with open('rundate.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Model', 'TP', 'FN', 'FP', 'TN', 'SN', 'SP', 'ACC', 'AUC'])
        writer.writerow(date)

Results for dataset: 5c hs test.tsv
TP: 207, FN: 115, FP: 108, TN: 155
SN: 0.6428571428571429, SP: 0.5893536121673004, ACC: 0.6188034188034188, AUC: 0.6264671846586213


Results for dataset: 5c mm test.tsv
TP: 484, FN: 254, FP: 287, TN: 385
SN: 0.6558265582655827, SP: 0.5729166666666666, ACC: 0.6163120567375886, AUC: 0.620392550651697


Results for dataset: 5chs train.tsv
TP: 1259, FN: 647, FP: 804, TN: 868
SN: 0.6605456453305352, SP: 0.5191387559808612, ACC: 0.5944661822247065, AUC: 0.5847995752521626


Results for dataset: 5cmm train.tsv
TP: 1870, FN: 1071, FP: 1369, TN: 1638
SN: 0.6358381502890174, SP: 0.5447289657465912, ACC: 0.589778076664425, AUC: 0.5932215061603396


Results for dataset: 6A ce test.tsv
TP: 1031, FN: 559, FP: 921, TN: 669
SN: 0.6484276729559748, SP: 0.4207547169811321, ACC: 0.5345911949685535, AUC: 0.5421102804477671


Results for dataset: 6A ce train.tsv
TP: 3975, FN: 2396, FP: 3544, TN: 2827
SN: 0.6239208915397897, SP: 0.4437293988384869, ACC: 0.5338251451891383, AUC: 0.5374362636023314


Results for dataset: 6A ceq test.tsv
TP: 602, FN: 415, FP: 836, TN: 786
SN: 0.591937069813176, SP: 0.4845869297163995, ACC: 0.5259568018188708, AUC: 0.5449122015744671


Results for dataset: 6A ceq train.tsv
TP: 3049, FN: 2000, FP: 2360, TN: 2084
SN: 0.6038819568231333, SP: 0.46894689468946893, ACC: 0.5407142104708733, AUC: 0.5370637331112791


Results for dataset: 6A Dme test.tsv
TP: 2505, FN: 851, FP: 1833, TN: 1524
SN: 0.7464243146603099, SP: 0.4539767649687221, ACC: 0.6001787576344406, AUC: 0.598408392191365


Results for dataset: 6A Dme train.tsv
TP: 5929, FN: 1906, FP: 4383, TN: 3451
SN: 0.7567326100829611, SP: 0.440515700791422, ACC: 0.5986342459633671, AUC: 0.6005690183626784


Results for dataset: 6A Fver test.tsv
TP: 461, FN: 178, FP: 321, TN: 315
SN: 0.7214397496087637, SP: 0.49528301886792453, ACC: 0.6086274509803922, AUC: 0.5902796232320549




Results for dataset: 6A Fver train.tsv
TP: 1680, FN: 783, FP: 1331, TN: 1135
SN: 0.682095006090134, SP: 0.4602595296025953, ACC: 0.5711097585717184, AUC: 0.5737034633253416


Results for dataset: 6A Hsa test.tsv
TP: 2591, FN: 1107, FP: 2134, TN: 1525
SN: 0.7006489994591671, SP: 0.4167805411314567, ACC: 0.5594671741198858, AUC: 0.5521257067668851


Results for dataset: 6A Hsa train.tsv
TP: 10138, FN: 4499, FP: 8483, TN: 6193
SN: 0.6926282708205234, SP: 0.4219814663396021, ACC: 0.557124825162897, AUC: 0.5567930294521068


Results for dataset: 6A Rch test.tsv
TP: 114, FN: 34, FP: 89, TN: 65
SN: 0.7702702702702703, SP: 0.42207792207792205, ACC: 0.5927152317880795, AUC: 0.6308353808353808


Results for dataset: 6A Rch train.tsv
TP: 265, FN: 186, FP: 248, TN: 197
SN: 0.5875831485587583, SP: 0.44269662921348313, ACC: 0.515625, AUC: 0.49879169884650837


Results for dataset: 6A Sce test.tsv
TP: 533, FN: 225, FP: 406, TN: 354
SN: 0.7031662269129287, SP: 0.46578947368421053, ACC: 0.5843214756258235, AUC: 0.5659960422163588


Results for dataset: 6A Sce train.tsv
TP: 2107, FN: 921, FP: 1612, TN: 1414
SN: 0.6958388375165125, SP: 0.46728354263053534, ACC: 0.581598942847704, AUC: 0.5800324968721106


Results for dataset: 6A Tol test.tsv
TP: 317, FN: 211, FP: 563, TN: 371
SN: 0.6003787878787878, SP: 0.39721627408993576, ACC: 0.47058823529411764, AUC: 0.49878333657776913


Results for dataset: 6A tol train.tsv
TP: 1836, FN: 1015, FP: 1367, TN: 1078
SN: 0.6439845668186601, SP: 0.4408997955010225, ACC: 0.550226586102719, AUC: 0.5459429798606882


Results for dataset: 6A tth test.tsv
TP: 1031, FN: 559, FP: 921, TN: 669
SN: 0.6484276729559748, SP: 0.4207547169811321, ACC: 0.5345911949685535, AUC: 0.5303939717574463


Results for dataset: 6A tth train.tsv
TP: 3975, FN: 2396, FP: 3544, TN: 2827
SN: 0.6239208915397897, SP: 0.4437293988384869, ACC: 0.5338251451891383, AUC: 0.5373329860197581




Results for dataset: 6A Xoc test.tsv
TP: 2203, FN: 1284, FP: 1981, TN: 1501
SN: 0.6317751648981933, SP: 0.43107409534750146, ACC: 0.5314966279236619, AUC: 0.5330704823544973


Results for dataset: 6A Xoc train.tsv
TP: 8800, FN: 4928, FP: 7811, TN: 5922
SN: 0.6410256410256411, SP: 0.43122405883637954, ACC: 0.5361057499726886, AUC: 0.5370690083539607


Results for dataset: 6aat test.tsv
TP: 6356, FN: 2910, FP: 4801, TN: 4396
SN: 0.6859486293977984, SP: 0.47798195063607696, ACC: 0.5823538969831555, AUC: 0.5826124079115224


