In this notebook we will train a deep learning model using all the data available !
* preprocessing : I encoded the smiles of all the train & test set and saved it [here](https://www.kaggle.com/datasets/ahmedelfazouan/belka-enc-dataset) , this may take up to 1 hour on TPU.
* Training & Inference : I used a simple 1dcnn model trained on 20 epochs.

How to improve :
* Try a different architecture : I'm able to get an LB score of 0.604 with minor changes on this architecture.
* Try another model like Transformer, or LSTM.
* Train for more epochs.
* Add more features like a one hot encoding of bb2 or bb3.
* And of course ensembling with GBDT models.

In [50]:
import gc
import os
import pickle
import random
import joblib
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score as APS
import polars as pl


In [51]:
import gc
import os
import pickle
import random
import joblib
import pandas as pd
# import polars as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import StratifiedKFold
import numpy as np
import torch.nn.functional as F


In [52]:
class Config:
    PREPROCESS = False
    KAGGLE_NOTEBOOK = False
    DEBUG = True
    
    SEED = 42
    EPOCHS = 10
    BATCH_SIZE = 4096
    LR = 1e-3
    WD = 0.05
    PATIENCE = 100
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    NBR_FOLDS = 15
    SELECTED_FOLDS = [0]
    
    
if Config.DEBUG:
    n_rows = 10**6
else:
    n_rows = None
    


In [53]:
if Config.KAGGLE_NOTEBOOK:
    RAW_DIR = "/kaggle/input/leash-BELKA/"
    PROCESSED_DIR = "/kaggle/input/belka-enc-dataset"
    OUTPUT_DIR = ""
    MODEL_DIR = ""
else:
    RAW_DIR = "../data/raw/"
    PROCESSED_DIR = "../data/processed/"
    OUTPUT_DIR = "../data/result/"
    MODEL_DIR = "../models/"

TRAIN_DATA_NAME = "train_enc.parquet"

In [54]:
def set_seeds(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)

set_seeds(seed=Config.SEED)

train_file_list = [f"../data/chuncked-dataset/local_train_enc_{i}.parquet" for i in range(10)]
train_file_list

['../data/chuncked-dataset/local_train_enc_0.parquet',
 '../data/chuncked-dataset/local_train_enc_1.parquet',
 '../data/chuncked-dataset/local_train_enc_2.parquet',
 '../data/chuncked-dataset/local_train_enc_3.parquet',
 '../data/chuncked-dataset/local_train_enc_4.parquet',
 '../data/chuncked-dataset/local_train_enc_5.parquet',
 '../data/chuncked-dataset/local_train_enc_6.parquet',
 '../data/chuncked-dataset/local_train_enc_7.parquet',
 '../data/chuncked-dataset/local_train_enc_8.parquet',
 '../data/chuncked-dataset/local_train_enc_9.parquet']

# Preprocessing

In [55]:
if Config.PREPROCESS:
    enc = {'l': 1, 'y': 2, '@': 3, '3': 4, 'H': 5, 'S': 6, 'F': 7, 'C': 8, 'r': 9, 's': 10, '/': 11, 'c': 12, 'o': 13,
           '+': 14, 'I': 15, '5': 16, '(': 17, '2': 18, ')': 19, '9': 20, 'i': 21, '#': 22, '6': 23, '8': 24, '4': 25, '=': 26,
           '1': 27, 'O': 28, '[': 29, 'D': 30, 'B': 31, ']': 32, 'N': 33, '7': 34, 'n': 35, '-': 36}
    train_raw = pd.read_parquet('/kaggle/input/leash-BELKA/train.parquet')
    smiles = train_raw[train_raw['protein_name']=='BRD4']['molecule_smiles'].values
    assert (smiles!=train_raw[train_raw['protein_name']=='HSA']['molecule_smiles'].values).sum() == 0
    assert (smiles!=train_raw[train_raw['protein_name']=='sEH']['molecule_smiles'].values).sum() == 0
    def encode_smile(smile):
        tmp = [enc[i] for i in smile]
        tmp = tmp + [0]*(142-len(tmp))
        return np.array(tmp).astype(np.uint8)

    smiles_enc = joblib.Parallel(n_jobs=96)(joblib.delayed(encode_smile)(smile) for smile in tqdm(smiles))
    smiles_enc = np.stack(smiles_enc)
    train = pd.DataFrame(smiles_enc, columns = [f'enc{i}' for i in range(142)])
    train['bind1'] = train_raw[train_raw['protein_name']=='BRD4']['binds'].values
    train['bind2'] = train_raw[train_raw['protein_name']=='HSA']['binds'].values
    train['bind3'] = train_raw[train_raw['protein_name']=='sEH']['binds'].values
    train.to_parquet('train_enc.parquet')

    test_raw = pd.read_parquet('/kaggle/input/leash-BELKA/test.parquet')
    smiles = test_raw['molecule_smiles'].values

    smiles_enc = joblib.Parallel(n_jobs=96)(joblib.delayed(encode_smile)(smile) for smile in tqdm(smiles))
    smiles_enc = np.stack(smiles_enc)
    test = pd.DataFrame(smiles_enc, columns = [f'enc{i}' for i in range(142)])
    test.to_parquet('test_enc.parquet')

else:
    # train = pl.read_parquet(os.path.join(PROCESSED_DIR, TRAIN_DATA_NAME), n_rows=n_rows)
    test = pl.read_parquet(os.path.join(PROCESSED_DIR, 'test_enc.parquet'), n_rows=n_rows)
    # train = train.to_pandas()
    test = test.to_pandas()

In [56]:
def prepare_data(train, train_idx, valid_idx, features, targets, device):
    """
    データの準備を行う関数
    """
    X_train = torch.tensor(train.loc[train_idx, features].values, dtype=torch.float32).to(device)
    y_train = torch.tensor(train.loc[train_idx, targets].values, dtype=torch.float32).to(device)
    X_val = torch.tensor(train.loc[valid_idx, features].values, dtype=torch.float32).to(device)
    y_val = torch.tensor(train.loc[valid_idx, targets].values, dtype=torch.float32).to(device)
    
    train_dataset = TensorDataset(X_train, y_train)
    valid_dataset = TensorDataset(X_val, y_val)
    
    train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
    
    return train_loader, valid_loader, X_val, y_val


def prepare_dataloader(train, val, features, targets, device):
    X_train = torch.tensor(train.loc[:, features].values, dtype=torch.float32).to(device)
    y_train = torch.tensor(train.loc[:, targets].values, dtype=torch.float32).to(device)
    X_val = torch.tensor(val.loc[:, features].values, dtype=torch.float32).to(device)
    y_val = torch.tensor(val.loc[:, targets].values, dtype=torch.float32).to(device)
    
    train_dataset = TensorDataset(X_train, y_train)
    valid_dataset = TensorDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
    
    return train_loader, valid_loader, X_val, y_val
    

In [57]:
import torch
import torch.nn as nn
import torch.optim as optim

class MyModel(nn.Module):
    def __init__(self, input_len=142, num_embeddings=36, embedding_dim=128, num_filters=32):
        super(MyModel, self).__init__()
        
        # Convolution layers
        self.conv1 = nn.Conv1d(in_channels=4096, out_channels=num_filters, kernel_size=3, padding=0)
        self.conv2 = nn.Conv1d(in_channels=num_filters, out_channels=num_filters*2, kernel_size=3, padding=0)
        self.conv3 = nn.Conv1d(in_channels=num_filters*2, out_channels=num_filters*3, kernel_size=3, padding=0)
        
        # Global Max Pooling
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)
        
        # Fully connected layers
        self.fc1 = nn.Linear(num_filters*3, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 3)
        
        # Dropout
        self.dropout = nn.Dropout(0.1)
        
        # Activation
        self.relu = nn.ReLU()

    def forward(self, x):
        # x = self.embedding(x.long()).permute(0, 2, 1)  # Change the shape to (batch_size, channels, sequence_length)
        print(x.shape)
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        
        x = self.global_max_pool(x).squeeze()  # Remove the last dimension after pooling
        
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.dropout(self.relu(self.fc3(x)))
        x = self.fc4(x)  # Applying sigmoid on the output layer for binary classification
        return x
    

class ImprovedCNNModel(nn.Module):
    def __init__(self, input_dim=142, input_dim_embedding=37, hidden_dim=128, num_filters=32, output_dim=3, dropout_prob=0.1):
        super(ImprovedCNNModel, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(num_embeddings=input_dim_embedding, embedding_dim=hidden_dim, padding_idx=0)
        
        # Convolutional layers
        self.conv1 = nn.Conv1d(in_channels=hidden_dim, out_channels=num_filters, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=num_filters, out_channels=num_filters*2, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=num_filters*2, out_channels=num_filters*3, kernel_size=3, padding=1)
        
        # Pooling layer
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
        
        # Fully connected layers
        self.fc1 = nn.Linear(num_filters*3 * (input_dim // 8), 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, output_dim)
        
        # Dropout layer
        self.dropout = nn.Dropout(dropout_prob)
        
        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.embedding(x.long()).permute(0, 2, 1)  # Ensure input is LongTensor
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

# Initialize the model
input_len = 142
num_filters = 32
embedding_dim = 128
num_embeddings = 36

model = MyModel(input_len=input_len, num_embeddings=num_embeddings, embedding_dim=embedding_dim, num_filters=num_filters)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()  # Binary cross-entropy loss for binary classification


In [58]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score

# Dataset preparation (assuming data is already loaded into `train_data` and `train_labels`)
def create_dataloader(X, y, batch_size, shuffle=True):
    tensor_x = torch.tensor(X, dtype=torch.float32)  # Convert features to tensor
    tensor_y = torch.tensor(y, dtype=torch.float32)  # Convert labels to tensor
    dataset = TensorDataset(tensor_x, tensor_y)  # Create dataset
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

# Trainer class
class Trainer:
    def __init__(self, model, optimizer, criterion, device):
        self.model = model.to(device)
        self.optimizer = optimizer
        self.criterion = criterion
        self.device = device
        self.best_val_loss = float('inf')
        self.patience_counter = 0
        self.scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    def train_epoch(self, train_loader):
        self.model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            # print(outputs.shape, targets.shape)
            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        return epoch_loss

    def validate(self, valid_loader):
        self.model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in valid_loader:
                inputs, targets = inputs.to(self.device), targets.to(self.device)
                outputs = self.model(inputs)
                loss = self.criterion(outputs, targets)
                val_loss += loss.item() * inputs.size(0)
        val_loss /= len(valid_loader.dataset)
        return val_loss

    def train(self, train_loader, valid_loader, epochs, patience):
        for epoch in range(epochs):
            train_loss = self.train_epoch(train_loader)
            val_loss = self.validate(valid_loader)
            self.scheduler.step(val_loss)

            print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                torch.save(self.model.state_dict(), 'best_model.pth')
                self.patience_counter = 0  # reset patience counter
            else:
                self.patience_counter += 1
                if self.patience_counter >= patience:
                    print('Early stopping')
                    break

def predict_in_batches(model, data, batch_size):
    model.eval()  # Set model to evaluation mode
    preds = []
    for i in range(0, data.size(0), batch_size):
        batch = data[i:i+batch_size].to(Config.DEVICE)
        with torch.no_grad():
            batch_preds = torch.sigmoid(model(batch))  # apply sigmoid
        preds.append(batch_preds.detach().cpu())
    return torch.cat(preds, dim=0)

In [59]:
def prepare_dataloader(train, val, features, targets, device):
    X_train = torch.tensor(train.loc[:, features].values, dtype=torch.float32).to(device)
    y_train = torch.tensor(train.loc[:, targets].values, dtype=torch.float32).to(device)
    X_val = torch.tensor(val.loc[:, features].values, dtype=torch.float32).to(device)
    y_val = torch.tensor(val.loc[:, targets].values, dtype=torch.float32).to(device)
    
    train_dataset = TensorDataset(X_train, y_train)
    valid_dataset = TensorDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=Config.BATCH_SIZE, shuffle=False)
    
    return train_loader, valid_loader, X_val, y_val

In [60]:
val = pl.read_parquet(train_file_list[9], n_rows=n_rows).to_pandas()
train = pl.read_parquet(train_file_list[0], n_rows=n_rows).to_pandas()

# Main training process
model = ImprovedCNNModel().to(Config.DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=Config.LR, weight_decay=Config.WD)
criterion = torch.nn.BCEWithLogitsLoss()# Binary cross-entropy loss for binary classification

# 定数やモデルの定義は適宜修正してください
FEATURES = [f'enc{i}' for i in range(142)]
TARGETS = ['bind1', 'bind2', 'bind3']

In [61]:



all_preds = []


# データの準備

train_loader, valid_loader, X_val, y_val = prepare_dataloader(train, val, FEATURES, TARGETS, Config.DEVICE)
trainer = Trainer(model, optimizer, criterion,Config.DEVICE)
trainer.train(train_loader, valid_loader, Config.EPOCHS, Config.PATIENCE)


oof = predict_in_batches(model, X_val, Config.BATCH_SIZE)
print('Val score =', APS(y_val.cpu().numpy(), oof.detach().cpu().numpy(), average='micro'))

test_tensor = torch.tensor(test.values, dtype=torch.float32).to(Config.DEVICE)
"TODO: バッチ処理するべき？ 1にしなくていいの？"
preds = predict_in_batches(model, test_tensor, Config.BATCH_SIZE)
all_preds.append(preds)


# CVのアンサンブル
preds = np.mean(all_preds, axis=0)

Epoch 1: Train Loss: 0.0535, Val Loss: 0.0366
Epoch 2: Train Loss: 0.0358, Val Loss: 0.0367
Epoch 3: Train Loss: 0.0360, Val Loss: 0.0358
Epoch 4: Train Loss: 0.0362, Val Loss: 0.0346
Epoch 5: Train Loss: 0.0364, Val Loss: 0.0355
Epoch 6: Train Loss: 0.0366, Val Loss: 0.0384
Epoch 7: Train Loss: 0.0368, Val Loss: 0.0403
Epoch 00008: reducing learning rate of group 0 to 1.0000e-04.
Epoch 8: Train Loss: 0.0370, Val Loss: 0.0390
Epoch 9: Train Loss: 0.0370, Val Loss: 0.0367
Epoch 10: Train Loss: 0.0371, Val Loss: 0.0368
Val score = 0.0072215929429453


In [62]:
# local testの予測と結果
local_test = pl.read_parquet(os.path.join(PROCESSED_DIR, 'local_test_enc.parquet'))
local_test = local_test.to_pandas()

target = local_test[TARGETS].values
local_test_tensor = torch.tensor(local_test[FEATURES].values, dtype=torch.float32).to(Config.DEVICE)
local_preds = predict_in_batches(model, local_test_tensor, Config.BATCH_SIZE)

# calculate score
score = APS(target, local_preds.detach().cpu().numpy(), average='micro')
print('local test score =', score)



local test score = 0.0072320951470594644


# Submission

In [63]:

# テストデータの読み込み
tst = pl.read_parquet(os.path.join(RAW_DIR, "test.parquet"), n_rows=n_rows).to_pandas()

# 'binds'列を追加して初期化
tst['binds'] = 0

# ブールマスクの作成
mask_BRD4 = (tst['protein_name'] == 'BRD4').values
mask_HSA = (tst['protein_name'] == 'HSA').values
mask_sEH = (tst['protein_name'] == 'sEH').values

# 各マスクに対応する予測値を代入
tst.loc[mask_BRD4, 'binds'] = preds[mask_BRD4][:, 0]
tst.loc[mask_HSA, 'binds'] = preds[mask_HSA][:, 1]
tst.loc[mask_sEH, 'binds'] = preds[mask_sEH][:, 2]



submission = tst[['id', 'binds']].copy()
# 'id'と'binds'列をCSVに出力
submission.to_csv(os.path.join(OUTPUT_DIR,'submission.csv'), index=False)


  tst.loc[mask_BRD4, 'binds'] = preds[mask_BRD4][:, 0]


In [64]:
submission

Unnamed: 0,id,binds
0,295246830,0.014513
1,295246831,0.014568
2,295246832,0.017553
3,295246833,0.013846
4,295246834,0.013899
...,...,...
999995,296246825,0.013007
999996,296246826,0.013058
999997,296246827,0.015815
999998,296246828,0.010312


In [65]:
submission[submission['binds'] > 0.3]

Unnamed: 0,id,binds


In [66]:
# 最良のモデルをロードして予測を行う
all_preds = []
model = ImprovedCNNModel().to(Config.DEVICE)
model.load_state_dict(torch.load(os.path.join(MODEL_DIR, 'best_model_65.pt')))
test_tensor = torch.tensor(test.values, dtype=torch.float32).to(Config.DEVICE)
"TODO: バッチ処理するべき？ 1にしなくていいの？"
preds = predict_in_batches(model, test_tensor, Config.BATCH_SIZE)
all_preds.append(preds)
# CVのアンサンブル
preds = np.mean(all_preds, axis=0)

In [67]:
# 定数やモデルの定義は適宜修正してください
FEATURES = [f'enc{i}' for i in range(142)]
TARGETS = ['bind1', 'bind2', 'bind3']


# local testの予測と結果
local_test = pl.read_parquet(os.path.join(PROCESSED_DIR, 'local_test_enc.parquet'))
local_test = local_test.to_pandas()

target = local_test[TARGETS].values
local_test_tensor = torch.tensor(local_test[FEATURES].values, dtype=torch.float32).to(Config.DEVICE)
local_preds = predict_in_batches(model, local_test_tensor, Config.BATCH_SIZE)

# calculate score
score = APS(target, local_preds.detach().cpu().numpy(), average='micro')
print('local test score =', score)



local test score = 0.007358354820648812


In [68]:
# テストデータの読み込み
tst = pl.read_parquet(os.path.join(RAW_DIR, "test.parquet"), n_rows=n_rows).to_pandas()

# 'binds'列を追加して初期化
tst['binds'] = 0

# ブールマスクの作成
mask_BRD4 = (tst['protein_name'] == 'BRD4').values
mask_HSA = (tst['protein_name'] == 'HSA').values
mask_sEH = (tst['protein_name'] == 'sEH').values

# 各マスクに対応する予測値を代入
tst.loc[mask_BRD4, 'binds'] = preds[mask_BRD4][:, 0]
tst.loc[mask_HSA, 'binds'] = preds[mask_HSA][:, 1]
tst.loc[mask_sEH, 'binds'] = preds[mask_sEH][:, 2]



submission = tst[['id', 'binds']].copy()
# 'id'と'binds'列をCSVに出力
submission.to_csv(os.path.join(OUTPUT_DIR,'submission.csv'), index=False)


  tst.loc[mask_BRD4, 'binds'] = preds[mask_BRD4][:, 0]


In [69]:
# check model output
train = pl.read_parquet(os.path.join(PROCESSED_DIR, TRAIN_DATA_NAME), n_rows=1).to_pandas()
target = train[TARGETS].values
tensor = torch.tensor(train[FEATURES].values, dtype=torch.float32).to(Config.DEVICE)
target = torch.tensor(target, dtype=torch.float32).to(Config.DEVICE)
output = model(tensor)

tensor, target, output

(tensor([[ 8., 22.,  8.,  8., 28., 12., 27., 12., 12., 12., 17.,  8., 33., 12.,
          18., 35., 12., 17., 33.,  8.,  8.,  4.,  8.,  8.,  8., 33.,  4., 12.,
           4., 12., 12., 12., 35., 35.,  4., 19., 35., 12., 17., 33., 29.,  8.,
           3.,  3.,  5., 32., 17.,  8.,  8., 22.,  8., 19.,  8.,  8., 17., 26.,
          28., 19., 33., 29., 30.,  2., 32., 19., 35., 18., 19., 12., 12., 27.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.]], device='cuda:0'),
 tensor([[0., 0., 0.]], device='cuda:0'),
 tensor([[-0.0167, -0.0024,  0.0393]], device='cuda:0',
        grad_fn=<AddmmBackward0>))

In [70]:
import torch.nn as nn



criterion = nn.CrossEntropyLoss()
loss = criterion(output, target)
loss

tensor(-0., device='cuda:0', grad_fn=<DivBackward1>)

In [71]:
criterion = nn.BCEWithLogitsLoss()
loss = criterion(output, target)
loss

tensor(0.6966, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [72]:
#モデルのoutputがほぼ0ということ
torch.sigmoid(output)

tensor([[0.4958, 0.4994, 0.5098]], device='cuda:0', grad_fn=<SigmoidBackward0>)