In [1]:
import sys
import os

sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
from lib.pipeline import Pipeline
import torch
from torchdrug import utils, data
from lib.lr_scheduler import ExponentialLR

GPU = 0

def make_pipeline(noise_rate):
    pipeline = Pipeline(
        model='lm-gearnet',
        dataset='atpbind3d',
        gpus=[GPU],
        model_kwargs={
            'gpu': GPU,
            'gearnet_hidden_dim_size': 512,
            'gearnet_hidden_dim_count': 4,
            'bert_freeze': False,
            'bert_freeze_layer_count': 28,
        },
        optimizer_kwargs={    
            'lr': 4e-4,
        },
        rus_kwargs={
            'rus_seed': 0,
            'rus_rate': 0.05,
            'rus_by': 'residue',
            'rus_noise_rate': noise_rate,
        },
        # task_kwargs={
        #     'use_rus': True,
        #     'rus_seed': 0,
        #     'undersample_rate': 0.05,
        # },
        batch_size=1,
        optimizer='adam',
    )
    state_dict = torch.load('../ResidueType_lmg_4_512_0.57268.pth',
                            map_location=f'cuda:{GPU}')
    pipeline.model.gearnet.load_state_dict(state_dict)

    scheduler = ExponentialLR(gamma=0.5**(1/12), optimizer=pipeline.solver.optimizer)
    pipeline.solver.scheduler = scheduler
    
    return pipeline

In [2]:
pipeline = make_pipeline(0.8)

get dataset atpbind3d
Initialize RUS: seed 0, rate 0.05, by residue
train samples: 302, valid samples: 76, test samples: 41
MLP ADDED


In [10]:
pipeline.solver.optimizer.param_groups[1]['lr'] = 0

In [17]:
next(pipeline.solver.model.mlp.parameters())

Parameter containing:
tensor([[-0.0045,  0.0115, -0.0093,  ...,  0.0057,  0.0161,  0.0097],
        [-0.0197, -0.0107,  0.0113,  ..., -0.0011, -0.0019, -0.0165],
        [ 0.0091, -0.0038, -0.0118,  ..., -0.0009, -0.0121,  0.0060],
        ...,
        [ 0.0037,  0.0100,  0.0092,  ...,  0.0051,  0.0014, -0.0062],
        [-0.0158, -0.0032,  0.0082,  ...,  0.0061, -0.0175,  0.0080],
        [-0.0092, -0.0058, -0.0012,  ...,  0.0001,  0.0049,  0.0208]],
       device='cuda:0', requires_grad=True)

In [19]:
next(pipeline.solver.model.model.gearnet.parameters())

Parameter containing:
tensor([1.3347, 1.0823, 0.9034, 0.9520, 1.3146, 1.0325, 1.1010, 1.1677, 1.2918,
        1.1377, 1.2278, 1.2927, 0.9906, 1.3678, 1.3902, 1.1946, 1.0668, 0.9827,
        1.3269, 1.2142, 0.9907, 0.8998, 1.2614, 0.7374, 0.9353, 1.1344, 0.8818,
        0.9328, 1.0411, 0.9060, 1.1951, 0.8864, 1.2463, 1.1630, 0.9844, 1.1253,
        1.0519, 0.6956, 1.2921, 0.8724, 1.1386, 1.0150, 1.0132, 1.0027, 1.3058,
        1.0413, 1.0687, 1.0131, 1.2803, 1.3961, 1.2348, 1.2052, 0.9625, 1.5080,
        1.1803, 0.8505, 1.1518, 1.1332, 1.0011, 0.9695, 0.9589, 0.9332, 0.9909,
        0.9214, 1.1829, 1.4087, 1.4551, 1.2632, 1.0494, 1.0086, 0.9102, 0.8306,
        1.3779, 1.3853, 1.1936, 0.9538, 1.2385, 1.2531, 1.3412, 1.4550, 1.1268,
        1.0568, 0.9427, 1.2968, 0.9741, 0.8772, 0.9371, 1.0916, 1.2469, 0.9944,
        1.1706, 1.2905, 1.0399, 0.9310, 0.9328, 1.0516, 0.9753, 0.9942, 0.9800,
        0.9173, 1.0713, 1.0293, 0.9875, 1.1083, 1.0312, 1.4031, 1.0717, 1.0169,
        0.6681, 1.

In [20]:
pipeline.train(num_epoch=1)

14:35:28   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
14:35:28   Epoch 0 begin
14:35:30   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
14:35:30   binary cross entropy: 0.731459
14:36:15   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
14:36:15   Epoch 0 end
14:36:15   duration: 4.81 mins
14:36:15   speed: 1.05 batch / sec
14:36:15   ETA: 0.00 secs
14:36:15   max GPU memory: 3563.7 MiB
14:36:15   ------------------------------
14:36:15   average binary cross entropy: 0.500527


In [21]:
next(pipeline.solver.model.mlp.parameters())

Parameter containing:
tensor([[-0.0045,  0.0115, -0.0093,  ...,  0.0057,  0.0161,  0.0097],
        [-0.0197, -0.0107,  0.0113,  ..., -0.0011, -0.0019, -0.0165],
        [ 0.0091, -0.0038, -0.0118,  ..., -0.0009, -0.0121,  0.0060],
        ...,
        [ 0.0037,  0.0100,  0.0092,  ...,  0.0051,  0.0014, -0.0062],
        [-0.0158, -0.0032,  0.0082,  ...,  0.0061, -0.0175,  0.0080],
        [-0.0092, -0.0058, -0.0012,  ...,  0.0001,  0.0049,  0.0208]],
       device='cuda:0', requires_grad=True)

In [10]:
import pandas as pd
df = pd.DataFrame()
for noise_rate in [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]:
    print('noise_rate:', noise_rate)
    pipeline = make_pipeline(noise_rate)
    patience = 5
    train_record = pipeline.train_until_fit(patience=patience)
    new_row = pd.DataFrame.from_dict([{'noise_rate': noise_rate, **train_record[-1-patience], 'num_epoch': len(train_record)}])
    df = pd.concat([df, new_row], ignore_index=True)
    df.to_csv('rus_noise_rate.csv', index=False)

noise_rate: 0
Initialize RUS: seed 0, rate 0.05, by residue
train samples: 302, valid samples: 76, test samples: 41
{'sensitivity': 0.8549, 'specificity': 0.7894, 'accuracy': 0.7928, 'precision': 0.1815, 'mcc': 0.3325, 'micro_auroc': 0.9121, 'train_bce': 0.4754, 'valid_bce': 0.3986, 'valid_mcc': 0.393}
{'sensitivity': 0.5917, 'specificity': 0.962, 'accuracy': 0.9428, 'precision': 0.4597, 'mcc': 0.492, 'micro_auroc': 0.9126, 'train_bce': 0.2527, 'valid_bce': 0.1548, 'valid_mcc': 0.4392}
{'sensitivity': 0.5167, 'specificity': 0.9572, 'accuracy': 0.9344, 'precision': 0.3975, 'mcc': 0.4192, 'micro_auroc': 0.8885, 'train_bce': 0.1255, 'valid_bce': 0.2087, 'valid_mcc': 0.4017}
{'sensitivity': 0.8166, 'specificity': 0.8385, 'accuracy': 0.8374, 'precision': 0.2164, 'mcc': 0.3661, 'micro_auroc': 0.9113, 'train_bce': 0.0625, 'valid_bce': 0.4753, 'valid_mcc': 0.4044}
{'sensitivity': 0.764, 'specificity': 0.8994, 'accuracy': 0.8924, 'precision': 0.2931, 'mcc': 0.4302, 'micro_auroc': 0.9184, 'train