In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

In [6]:
train_sps = torch.load('data/train_sps.ids76.pt')
train_smile = torch.load('data/train_smile.ids68.pt')
train_ic50 = torch.load('data/train_ic50_log.pt')

test_sps = torch.load('data/test_sps.ids76.pt')
test_smile = torch.load('data/test_smile.ids68.pt')
test_ic50 = torch.load('data/test_ic50_log.pt')

In [7]:
train_sps.shape

torch.Size([263583, 152])

In [29]:
class RnnCnnModel(nn.Module):
    def __init__(self, vocab_size_protein, vocab_size_compound, GRU_size_prot, GRU_size_drug, U_size, batch_size):
        super().__init__()
        self.GRU_size_prot = GRU_size_prot
        self.GRU_size_drug = GRU_size_drug
        self.batch_size = batch_size
        
        # Protein path
        self.prot_embedding = nn.Embedding(vocab_size_protein, GRU_size_prot)
        self.prot_gru_1 = nn.GRU(GRU_size_prot, GRU_size_prot, batch_first=True)
        self.prot_gru_2 = nn.GRU(GRU_size_prot, GRU_size_prot, batch_first=True)
        
        # Drug path
        self.drug_embedding = nn.Embedding(vocab_size_compound, GRU_size_drug)
        self.drug_gru_1 = nn.GRU(GRU_size_drug, GRU_size_drug, batch_first=True)
        self.drug_gru_2 = nn.GRU(GRU_size_drug, GRU_size_drug, batch_first=True)
        
        # Attention mechanism components
        self.W_attn = nn.Parameter(torch.Tensor(GRU_size_prot, GRU_size_drug))
        self.b_attn = nn.Parameter(torch.Tensor(1))
        self.U_prot = nn.Parameter(torch.Tensor(U_size, GRU_size_prot))
        self.U_drug = nn.Parameter(torch.Tensor(U_size, GRU_size_drug))
        self.B = nn.Parameter(torch.Tensor(U_size))
        
        # Convolution and fully connected layers
        self.conv1 = nn.Conv1d(U_size, 64, kernel_size=4, stride=2)
        self.fc1 = nn.Linear(64*32, 600)  # Adjust the input features according to the output of conv layers
        self.fc2 = nn.Linear(600, 300)
        self.fc3 = nn.Linear(300, 1)

    def attention_mechanism(self, prot_out_2, drug_out_2):
        # Assuming prot_out_2 and drug_out_2 are the outputs from the last GRU layer for protein and drug respectively
        
        # Compute VU term (dot product + addition of bias, with non-linearity)
        # V is prot_out_2 transformed by W_attn, and U is drug_out_2 directly used
        V = torch.matmul(prot_out_2, self.W_attn)  # [batch_size, seq_len_prot, GRU_size_drug]
        VU = torch.tanh(torch.matmul(V, drug_out_2.transpose(1, 2)) + self.b_attn)  # [batch_size, seq_len_prot, seq_len_drug]
        
        # Flatten VU to compute softmax
        VU_flat = VU.view(-1, VU.size(-1))  # Flatten to [batch_size * seq_len_prot, seq_len_drug]
        alphas_flat = F.softmax(VU_flat, dim=1)
        alphas = alphas_flat.view(VU.size())  # Reshape back to [batch_size, seq_len_prot, seq_len_drug]
        
        # Compute context vector as weighted sum
        context_vector = torch.bmm(alphas, drug_out_2)  # [batch_size, seq_len_prot, GRU_size_drug]
        
        # Compute final attention vector (Attn) by combining U_prot, U_drug, and B terms
        Attn = torch.zeros(self.batch_size, self.U_prot.size(0), device=prot_out_2.device)  # Initialize Attn
        for i in range(context_vector.size(1)):  # Iterate over seq_len_prot
            temp = torch.matmul(context_vector[:, i, :], self.U_drug) + torch.matmul(prot_out_2[:, i, :], self.U_prot) + self.B
            Attn += temp * alphas[:, i, :].unsqueeze(2)  # Weight by alphas
        
        return Attn

    def forward(self, prot_data, drug_data):
        # Embedding and GRU for protein
        prot_emb = self.prot_embedding(prot_data)
        prot_out_1, _ = self.prot_gru_1(prot_emb)
        prot_out_2, _ = self.prot_gru_2(prot_out_1)
        
        # Embedding and GRU for drug
        drug_emb = self.drug_embedding(drug_data)
        drug_out_1, _ = self.drug_gru_1(drug_emb)
        drug_out_2, _ = self.drug_gru_2(drug_out_1)
        
        # Attention mechanism
        Attn = self.attention_mechanism(prot_out_2, drug_out_2)
        
        # Assuming Attn is appropriately resized or pooled for the convolutional layers
        Attn = Attn.unsqueeze(1)  # Add channel dimension for conv1d
        conv_out = F.relu(self.conv1(Attn))
        conv_out = conv_out.view(conv_out.size(0), -1)  # Flatten for FC layers
        
        # Fully connected layers
        fc_out = F.relu(self.fc1(conv_out))
        fc_out = F.dropout(fc_out, 0.8, training=self.training)
        fc_out = F.relu(self.fc2(fc_out))
        fc_out = F.dropout(fc_out, 0.8, training=self.training)
        output = self.fc3(fc_out)
        
        return output

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

protein_vocab_size = 76 # 토큰 어휘집의 어휘 개수
compound_vocab_size = 68 # 토큰 어휘집의 어휘 개수
protein_seq_length = 152 # 한 문장의 토큰 수
compound_seq_length = 100 # 한 문장의 토큰 수
embedding_dim = 256
hidden_dim = 128
batch_size = 64
U_size = 256

In [36]:
model = RnnCnnModel(protein_vocab_size, compound_vocab_size, embedding_dim, embedding_dim, U_size, batch_size)
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.001)

model.train()

RnnCnnModel(
  (prot_embedding): Embedding(76, 256)
  (prot_gru_1): GRU(256, 256, batch_first=True)
  (prot_gru_2): GRU(256, 256, batch_first=True)
  (drug_embedding): Embedding(68, 256)
  (drug_gru_1): GRU(256, 256, batch_first=True)
  (drug_gru_2): GRU(256, 256, batch_first=True)
  (conv1): Conv1d(256, 64, kernel_size=(4,), stride=(2,))
  (fc1): Linear(in_features=2048, out_features=600, bias=True)
  (fc2): Linear(in_features=600, out_features=300, bias=True)
  (fc3): Linear(in_features=300, out_features=1, bias=True)
)

In [37]:
batch_size = 64
shuffle = True

dataset = TensorDataset(train_sps, train_smile, train_ic50)
data_loader = DataLoader(dataset, batch_size= batch_size, shuffle=shuffle, pin_memory=True)

num_epochs = 10

# 훈련 시작
for epoch in range(num_epochs):
    model.to(device)
    model.train()  # 모델을 훈련 모드로 설정
    total_loss = 0.0  # 에폭별 총 손실을 추적
    
    # tqdm을 사용하여 진행 상황 막대 표시
    progress_bar = tqdm(enumerate(data_loader), total=len(data_loader), desc=f"Epoch {epoch+1}/{num_epochs}")
    
    for batch_idx, (sps, smile, log_ic50) in progress_bar:
        sps = sps.to(device)
        smile = smile.to(device)
        log_ic50 = log_ic50.to(device)

        optimizer.zero_grad()  # 그라디언트 초기화
        
        output = model(sps, smile) # drug sequence에서 look ahead mask는 필요가 없음
        
        loss = criterion(output.squeeze(), log_ic50.float())
        
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # 진행률 막대에 현재 평균 손실 표시
        progress_bar.set_postfix({'avg_loss': total_loss / (batch_idx + 1)})
    
    avg_loss = total_loss / len(data_loader)
    print(f"\nEpoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss}")

Epoch 1/10:   0%|          | 0/4119 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (64) must match the size of tensor b (100) at non-singleton dimension 1