In [1]:
import sys, torch, os
sys.path.append('..')
from src.networks import BaseNetwork
from src.data import ReactionDataset
from src.feature import PrecursorDataset
from src.trainer import BaseTrainer
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import matplotlib.pyplot as plt

In [2]:
DS = ReactionDataset(feat_type='composit', shuffle_sequence=False, include_eos=1)
test = False
if test:
    DS.from_file('../data/surxn.pkl.gz', 
    )
else:
    DS.from_file('../data/screened_unique_reaction.pkl.gz')
#    DS.from_file('../data/screened_conditional_reaction.pkl.gz', 
#                 heat_temp_key=('heat_temp','median'))

years = np.array([d.year for d in DS])
train_mask = years < 2016
valid_mask = (years >= 2016) & (years < 2018)
test_mask = years >= 2018

train_dl = DataLoader(DS, batch_size=256, sampler=SubsetRandomSampler(np.where(train_mask)[0]), collate_fn=DS.cfn)
valid_dl = DataLoader(DS, batch_size=2048, sampler=np.where(valid_mask)[0], collate_fn=DS.cfn)
test_dl = DataLoader(DS, batch_size=2048, sampler=np.where(test_mask)[0], collate_fn=DS.cfn)

In [3]:
feat, info = next(iter(valid_dl))
for k,v in feat.items():
    print(k, v.shape, type(v))

x torch.Size([2048, 7, 88]) <class 'torch.Tensor'>
label torch.Size([14336]) <class 'torch.Tensor'>
neg_label torch.Size([14336]) <class 'torch.Tensor'>
context torch.Size([2048, 88]) <class 'torch.Tensor'>
weight torch.Size([14336]) <class 'torch.Tensor'>
sequence_mask torch.Size([14336]) <class 'torch.Tensor'>
precursor_mask torch.Size([2048, 1, 414]) <class 'torch.Tensor'>


# Transformer

#### tested classes

In [9]:
# 완성 후 삭제

from src.trainer import BaseTrainer
from src.networks import BaseNetwork, PositionalEncoding, TransformerDecoderBlock
from src.trainer import SequenceTrainer

## Test

In [14]:
import time
model = TransformerDecoderBlock(
    feature_dim = DS.num_precursor_feat,
    context_dim = DS.num_condition_feat, 
    vocab_dim = DS.NUM_LABEL, 
    num_heads = 4, hidden_dim = 64, hidden_layers = 4)

tr = SequenceTrainer(model, lr=1e-4)
for i in range(5):
    t1 = time.time()
    train_loss = tr.train(train_dl)
    valid_loss, out = tr.test(valid_dl)
    test_loss, out = tr.test(test_dl)
    print('{:4d} {:9.5f} {:9.5f} {:9.5f} / {:.2f}'.format(i, train_loss, valid_loss, test_loss, time.time() - t1))

   0   3.67506   3.23439   3.23429 / 3.91
   1   3.23277   3.00086   3.00070 / 3.93
   2   3.04168   2.84369   2.84370 / 3.84
   3   2.88790   2.71018   2.71016 / 3.46
   4   2.77160   2.60471   2.60460 / 3.83


In [17]:
torch.set_float32_matmul_precision('high')
model = TransformerDecoderBlock(
    feature_dim = DS.num_precursor_feat,
    context_dim = DS.num_condition_feat, 
    vocab_dim = DS.NUM_LABEL, 
    num_heads = 4, hidden_dim = 64, hidden_layers = 4)
model.to('cuda')
#compiled_model = torch.compile(model)
tr = SequenceTrainer(model, lr=1e-4)
for i in range(5):
    t1 = time.time()
    train_loss = tr.train(train_dl)
    valid_loss, out = tr.test(valid_dl)
    test_loss, out = tr.test(test_dl)
    print('{:4d} {:9.5f} {:9.5f} {:9.5f} / {:.2f}'.format(i, train_loss, valid_loss, test_loss, time.time() - t1))

   0   3.64891   3.24302   3.24302 / 3.75
   1   3.23010   2.99318   2.99315 / 3.87
   2   3.03491   2.84142   2.84157 / 3.43
   3   2.88542   2.71026   2.71019 / 3.84
   4   2.76733   2.59806   2.59782 / 3.95


In [39]:
model.eval()
z1 = model(target = feat['precursor_feat'], context = feat['context'])
z2 = model(target = DS.get_embedding(feat['target']), context=feat['context'])
(z1 - z2).abs().sum()

tensor(0., grad_fn=<SumBackward0>)

In [18]:
pred = out['pred'].argmax(-1)
label = out['label']
weight = out['weight']
n_data, l_seq = pred.shape
mask = np.hstack([np.ones((n_data, 1), dtype=bool), (label != DS.EOS_LABEL)[..., :-1]])
acc_rxn = []
for p, l, m, w in zip(pred, label, mask, weight):
    hit = (p[m] != l[m]).sum() == 0
    acc_rxn.append(hit)
len(acc_rxn), weight.shape

(27967, (27967,))

In [20]:
mask[0], label[0]

(array([ True,  True,  True, False, False, False, False]),
 array([  2,   5, 378, 378, 378, 378, 378]))

In [39]:
model = TransformerDecoderBlock(DS.num_condition_feat, num_heads=4, hidden_dim=64, hidden_layers=4)
tr = SequenceTrainer(model, lr=1e-4)
for i in range(100):
    train_loss = tr.train(train_dl)
    valid_loss, out = tr.test(valid_dl)
    test_loss, out = tr.test(test_dl)
    print('{:4d} {:9.5f} {:9.5f} {:9.5f}'.format(i, train_loss, valid_loss, test_loss))

   0   1.99951   1.44498   1.44493
   1   1.32844   1.18909   1.18908
   2   1.15493   1.06664   1.06657
   3   1.05857   0.98836   0.98805
   4   0.99526   0.93284   0.93303
   5   0.94666   0.88761   0.88701
   6   0.90799   0.85305   0.85327
   7   0.87553   0.82254   0.82096
   8   0.85084   0.79534   0.79469
   9   0.82855   0.77896   0.77786
  10   0.80872   0.75424   0.75411
  11   0.79169   0.73576   0.73554
  12   0.77698   0.72410   0.72531
  13   0.76392   0.70815   0.70855
  14   0.75060   0.69569   0.69644
  15   0.73886   0.68417   0.68380
  16   0.73034   0.67255   0.67398
  17   0.72001   0.66320   0.66334
  18   0.71236   0.65547   0.65561
  19   0.70242   0.64979   0.64865
  20   0.69550   0.64075   0.64030
  21   0.69033   0.63113   0.63170
  22   0.68170   0.62679   0.62562
  23   0.67514   0.61784   0.61814
  24   0.66914   0.61233   0.61180
  25   0.66434   0.60926   0.60969
  26   0.65751   0.60311   0.60178
  27   0.65465   0.59412   0.59472
  28   0.64847   0.5

In [65]:
def generate(self, context, max_len=20):
        output_seq = torch.ones(context.shape[0], 1).long().to(self.device) * 443
        for _ in range(max_len - 1):
            output = self.forward(output_seq, context)
            output_seq = torch.hstack([output_seq, output.argmax(-1)[:, -1:]])
        seq = output_seq.cpu().numpy()[:, 1:]
        j = (seq != EOS_LABEL).sum(1).max()
        return seq[:, :j]

generate(model, feat['context'][:3].to('cuda'))

array([[5, 5, 5, 5, 8, 5, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5],
       [5, 5, 5, 5, 8, 5, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5],
       [5, 5, 5, 8, 5, 5, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5]])

In [64]:
for _prd, _lbl in zip(out_gen, feat['label'].cpu().numpy()):
    prd = np.array(sorted(_prd[_prd != EOS_LABEL]))
    lbl = np.array(sorted(_lbl[_lbl != EOS_LABEL]))
    if len(prd) != len(lbl):
        print(prd, lbl)
    elif np.sum(prd != lbl) != 0:
        print(prd, lbl)

[2 5] [  5 148]
[2 5] [  5 148]
[2 5] [  2 107]
[2 5] [  5 108]
[5 8] [  8 124]
[  5 106] [  5 189]
[ 0 16 29] [  0  16 113]
[ 0 16 29] [  0  16 113]
[ 0 16 29] [  0  16 113]
[ 0 16 29] [  0  16 182]
[ 0 16 29] [  0  16 182]
[  8  14 126 161] [ 14  66 108 250]
[  8  14 126 161] [ 14  66 108 250]
[  8  14 126 161] [ 14  66 108 250]
[ 8 51] [ 8 24 51]
[ 8 51] [ 8 24 51]
[ 8 51] [ 8 24 51]
[ 7 18 29] [  7  29 177]
[ 7 18 29] [  7  29 177]
[ 7 18 29] [  7  29 177]
[ 7 18 29] [  7  29 177]
[ 7 18 29] [  7  29 177]
[ 7 18 29] [  7  88 303]
[ 7 18 29] [  7  88 113]
[ 7 18 29] [  7  88 113]
[ 7 18 29] [  7  88 113]


In [70]:
out_gen

array([[  5,   2, 444, 444],
       [  5,   2, 444, 444],
       [  5,   2, 444, 444],
       ...,
       [ 12,   9, 444, 444],
       [ 12,   9, 444, 444],
       [ 12,   9, 444, 444]])

In [48]:
for l, m in zip(feat['label'], feat['target'] != 444):
    print(l[m].cpu(), l)

tensor([  0,  13,  14,   4, 444]) tensor([  0,  13,  14,   4, 444, 444, 444], device='cuda:0')
tensor([  5,  65, 444]) tensor([  5,  65, 444, 444, 444, 444, 444], device='cuda:0')
tensor([  4,  76, 444]) tensor([  4,  76, 444, 444, 444, 444, 444], device='cuda:0')
tensor([ 27,  49, 444]) tensor([ 27,  49, 444, 444, 444, 444, 444], device='cuda:0')
tensor([  6, 197,  18, 444]) tensor([  6, 197,  18, 444, 444, 444, 444], device='cuda:0')
tensor([ 50,   2,   5,   0, 444]) tensor([ 50,   2,   5,   0, 444, 444, 444], device='cuda:0')
tensor([  1,  32,  25, 444]) tensor([  1,  32,  25, 444, 444, 444, 444], device='cuda:0')
tensor([  3,   0,   1, 444]) tensor([  3,   0,   1, 444, 444, 444, 444], device='cuda:0')
tensor([  2,   7,  13, 444]) tensor([  2,   7,  13, 444, 444, 444, 444], device='cuda:0')
tensor([ 79,  27,  49, 444]) tensor([ 79,  27,  49, 444, 444, 444, 444], device='cuda:0')
tensor([  6,   1,  30, 444]) tensor([  6,   1,  30, 444, 444, 444, 444], device='cuda:0')
tensor([ 10,   

# LSTM

In [3]:
# 완성 후 삭제
from src.networks import LSTMDecoderBlock, TransformerDecoderBlock
from src.trainer import BaseTrainer

class SequenceTrainer(BaseTrainer):
    def __init__(self, model, lr, device='cuda', crit=torch.nn.CrossEntropyLoss(reduction='none')):
        super().__init__(model, lr, device, crit)
    
    def _eval_batch(self, batch, compute_loss=True):
        _feat, _ = batch
        feat = {k:v.to(self.device) for k,v in _feat.items()}
        pred = self.model(**feat)
        B, S, L = pred.shape
        if compute_loss:
            pos = self.crit((pred * feat['precursor_mask']).view(B * S, -1), feat['label']) * feat['weight']
            neg = torch.exp(-self.crit((pred * feat['precursor_mask']).view(B * S, -1), feat['neg_label'])) * feat['weight'] 
#            _loss = self.crit(pred.view(feat['label'].shape[0], -1), feat['label'])
#            print(_loss.shape, feat['weight'].shape, feat['mask'].shape)
            #loss = pos[feat['sequence_mask']].mean() #+ neg[feat['sequence_mask']].mean()
            loss = (self.crit(pred.view(B * S, -1), feat['label']) * feat['weight'])[feat['sequence_mask']].mean()
            return loss, [pos[feat['sequence_mask']].mean().item(), 
                          neg[feat['sequence_mask']].mean().item(), 
                          (pred * feat['precursor_mask']).detach().cpu().numpy()]
        else:
            return [pos[feat['sequence_mask']].mean().item(), 
                    neg[feat['sequence_mask']].mean().item(), 
                    (pred * feat['precursor_mask']).detach().cpu().numpy()]
    
    def _parse_output(self, batch, output):
        feat, info = batch
        pos, neg, pred = output
        if self._output is None:
            self._output = {
                'info' : info,
                'pos' : [pos],
                'neg' : [neg],
                'pred' : pred
            }
            if feat['weight'] is not None:
                n = feat['context'].shape[0]
                self._output.update({
                    'label': feat['label'].cpu().numpy().reshape(n, -1),
                    'weight': feat['weight'].cpu().numpy().reshape(n, -1)[:, 0],
                })
        else:
            self._output['info'].extend(info)
            self._output['pred'] = np.vstack([self._output['pred'], pred])
            self._output['pos'].append(pos)
            self._output['neg'].append(neg)
            if feat['weight'] is not None:
                n = feat['context'].shape[0]
                self._output['label'] = np.vstack([self._output['label'], feat['label'].cpu().numpy().reshape(n, -1)])
                self._output['weight'] = np.hstack([self._output['weight'], feat['weight'].cpu().numpy().reshape(n, -1)[:,0]])

In [4]:
batch = next(iter(valid_dl))
#model = LSTMDecoderBlock(
model = TransformerDecoderBlock(
    feature_dim=DS.num_precursor_feat, 
    context_dim=DS.num_condition_feat,
    output_dim=DS.NUM_LABEL)

tr = SequenceTrainer(model, 1e-3, device='cuda')
for i in range(500):
    train_loss = tr.train(train_dl)
    valid_loss, valid_out = tr.test(valid_dl)
    test_loss, test_out = tr.test(test_dl)
    vp = valid_out['pos']
    vn = valid_out['neg']
    tp = test_out['pos']
    tn = test_out['neg']
    if i % 10 == 0:
        print('{:4d} {:8.3f} | {:8.3f} {:8.3f} {:8.3f} | {:8.3f} {:8.3f} {:8.3f}'.format(
            i, train_loss, valid_loss, np.mean(vp), np.mean(vn), test_loss, np.mean(tp), np.mean(tn)))

   0    4.095 |    3.813    3.957    0.011 |    3.839    3.990    0.011
  10    2.070 |    2.250    1.706    0.079 |    2.264    1.744    0.080
  20    1.739 |    1.912    1.436    0.058 |    1.968    1.518    0.055
  30    1.571 |    1.726    1.304    0.051 |    1.796    1.398    0.048
  40    1.475 |    1.632    1.242    0.047 |    1.705    1.333    0.044
  50    1.409 |    1.560    1.203    0.040 |    1.637    1.299    0.036
  60    1.364 |    1.510    1.155    0.041 |    1.582    1.245    0.038
  70    1.332 |    1.487    1.140    0.042 |    1.566    1.235    0.039
  80    1.301 |    1.449    1.127    0.035 |    1.530    1.228    0.031
  90    1.280 |    1.434    1.108    0.034 |    1.512    1.201    0.030
 100    1.267 |    1.418    1.125    0.031 |    1.506    1.230    0.027
 110    1.255 |    1.409    1.107    0.033 |    1.485    1.198    0.030
 120    1.241 |    1.398    1.113    0.027 |    1.488    1.218    0.025
 130    1.234 |    1.385    1.116    0.026 |    1.480    1.230  

In [111]:
feat, info = next(iter(test_dl))
#feat['x']
context = feat['context']
# case 1 - uncertainty based beam generation
def beam_search(model, contexts, embed_fn, beam_width=50, max_seq_len=8):
    n, c = contexts.shape
    model.train()
    sequences = - torch.ones(contexts.shape[0], beam_width, 1).long().cpu() + model.output_dim
    scores = torch.zeros_like(sequences).float().to('cuda')

    extended_contexts = contexts.unsqueeze(1).repeat(1, beam_width, 1).reshape(n * beam_width, c).to('cuda')
    for _ in range(max_seq_len):
        last_tokens = embed_fn(sequences[:, :, -1].reshape(-1, 1).cpu()).to('cuda')
        with torch.no_grad():
            logits = model(last_tokens, extended_contexts)
        log_probs = torch.nn.functional.log_softmax(logits, dim=-1).reshape(n, beam_width, -1)
        total_scores = scores + log_probs
        topk_scores, topk_indices = total_scores.reshape(n, -1).topk(beam_width, dim=-1)
        beam_idx = torch.div(topk_indices, model.output_dim, rounding_mode='trunc')
        token_idx = topk_indices % model.output_dim
        return sequences, beam_idx, token_idx
#        torch.cat([sequences], dim=-1)
        scores = topk_scores.unsqueeze(-1)
        return total_scores

s, i, j = beam_search(model, feat['context'], DS.get_embedding)

# profiling

In [2]:
import sys
sys.path.append('..')
from src.data import ReactionDataset
from src.networks import TransformerDecoderBlock
import torch, gc
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.profiler import profile, record_function, ProfilerActivity
import numpy as np

In [3]:
DS = ReactionDataset(feat_type='cgcnn')
DS.from_file('../data/screened_conditional_reaction.pkl.gz', 
             heat_temp_key=('heat_temp','median'))
#DS.to('cuda')

years = np.array([d.year for d in DS])
train_mask = years < 2016
valid_mask = (years >= 2016) & years < 2018
test_mask = years >= 2018

In [40]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
torch.cuda.init()

train_dl = DataLoader(DS, batch_size=64, sampler=SubsetRandomSampler(np.where(train_mask)[0]), 
                      collate_fn=DS.cfn)#num_workers=1, prefetch_factor=4, collate_fn=DS.cfn)

valid_dl = DataLoader(DS, batch_size=2048, sampler=np.where(train_mask)[0], collate_fn=DS.cfn)
test_dl = DataLoader(DS, batch_size=2048, sampler=np.where(train_mask)[0], collate_fn=DS.cfn)

model = TransformerDecoderBlock(DS.num_condition_feat)
model.to('cuda')
opt = torch.optim.AdamW(model.parameters(), lr=1e-4)
crit = torch.nn.CrossEntropyLoss(reduction='none')

with profile(
    activities=[
        ProfilerActivity.CPU,
        ProfilerActivity.CUDA,
    ],
    schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
    on_trace_ready=torch.profiler.tensorboard_trace_handler('../dump/log_dir'),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
    ) as prof:


    for _feat, _ in train_dl:
        feat = {k:v.to('cuda') for k,v in _feat.items()}
        n_batch, l_seq = feat['label'].shape
        with record_function('model_inference'):
            pred = model(**feat)
#        with record_function('0_compute_loss'):
            _loss = crit(pred.reshape(n_batch * l_seq, -1), feat['label'].view(-1))
#            loss = (_loss * weight)[mask].mean()
            loss = (_loss * feat['weight']).mean()
        with record_function('backward_pass'):
            opt.zero_grad()
            loss.backward()
            opt.step()
        prof.step()
gc.collect()
torch.cuda.empty_cache()
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=100))
#print(prof.key_averages().table(row_limit=100))

STAGE:2024-06-14 14:23:23 787949:787949 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-14 14:23:23 787949:787949 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-14 14:23:23 787949:787949 ActivityProfilerController.cpp:324] Completed Stage: Post Processing
STAGE:2024-06-14 14:23:24 787949:787949 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-06-14 14:23:24 787949:787949 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-06-14 14:23:24 787949:787949 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         0.90%     322.000us        79.97%      28.589ms       9.530ms       0.000us         0.00%       1.259ms     419.667us      29.19 Kb     -61.88 Kb           0 b     -97.50 K

In [11]:
print(prof.key_averages().table(sort_by="cpu_time", row_limit=100))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         0.58%     434.000us        90.70%      68.182ms      22.727ms       0.000us         0.00%       3.002ms       1.001ms     116.75 Kb    -233.50 Kb    -725.00 Kb           0 

In [13]:
with open('../dump/profiler_B64_NW1_PF0_indexed.txt','w') as f: 
    f.write(prof.key_averages().table(sort_by="cpu_time_total", row_limit=100))