In [1]:
from DeepPurpose import utils, dataset
from sklearn.model_selection import train_test_split
from DeepPurpose.utils import *

import torch
import numpy as np
from AutoEncoder import AutoEncoder
from torch.utils.data import DataLoader, Dataset
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
branch_model_to_use = 'protein'
drug_encoding = 'MPNN'
target_encoding = 'CNN'

wandb_project_name = 'DeepPurpose'
wandb_project_entity = 'diliadis'

In [3]:
X_drugs, X_targets, y = dataset.load_process_DAVIS(path = './data', binary = False, convert_to_log = True, threshold = 30) # http://staff.cs.utu.fi/~aatapa/data/DrugTarget/
drug_encoding, target_encoding = drug_encoding, target_encoding
print('Processing the dataset...')
train, _, _ = utils.data_process(X_drugs, X_targets, y, 
                            drug_encoding, target_encoding, 
                            split_method='random',frac=[0.7,0.1,0.2],
                            random_seed = 1)
print('Done! ')

# get the feature representations of the unique drugs or proteins
data = train.drop_duplicates('SMILES' if branch_model_to_use=='drug' else 'Target Sequence', ignore_index=True)['SMILES' if branch_model_to_use=='drug' else 'target_encoding']

frac = {'train': 0.8, 'val': 0.1, 'test': 0.2}
print('Splitting the dataset...')
# split to train, val, test
train, test = train_test_split(data, test_size=frac['test'], random_state=42)
train, val = train_test_split(train, test_size=frac['val']/(1-frac['test']), random_state=42)
print('Done! ')


Beginning Processing...
Beginning to extract zip file...
Default set to logspace (nM -> p) for easier regression
Done!
Processing the dataset...
Drug Target Interaction Prediction Mode...
in total: 30056 drug-target pairs
encoding drug...
unique drugs: 68
encoding protein...
unique target sequence: 379
splitting dataset...
Done.
Done! 
Splitting the dataset...
Done! 


In [4]:
config = {
    'wandb_project_name': 'Protein_autoencoder',
    'wandb_project_entity': 'diliadis',
    
    'drug_encoding': 'MPNN',
    'target_encoding': 'CNN',
    
    'cuda_id': '7',
    'num_workers': 2,
    
    # 'experiment_name': 'autoencoder_'+branch_model_to_use+'_',
    'experiment_name': None,
    'result_folder': './results/',
    
    'decay': 0,
    'LR': 0.001,
    
    'batch_size': 32,
    'train_epoch': 100,
    'test_every_X_epoch': 5,
    
    'cnn_filters': [32, ],
    'cnn_kernels': [3, ],
    
    'use_early_stopping': True,
    'patience': 5,
    'delta': 0.0005,
    'metric_to_optimize_early_stopping': 'loss',
    'metric_to_optimize_best_epoch_selection': 'loss',
    
    'save_model': False
    }

In [5]:
# inialize the model
model = AutoEncoder(config)

Using the following device: cuda:7
Early stopping detected metric: loss


In [6]:
model.model

AutoEncoder_model(
  (encoder): Sequential(
    (0): Conv1d(26, 32, kernel_size=(3,), stride=(1,))
    (1): ReLU(inplace=True)
  )
  (decoder): Sequential(
    (0): ConvTranspose1d(32, 26, kernel_size=(3,), stride=(1,))
    (1): ReLU(inplace=True)
    (2): Tanh()
  )
)

In [7]:
model.train(train, val, test)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdiliadis[0m. Use [1m`wandb login --relogin`[0m to force relogin


Num train batches: 9
Num validation batches: 2
Num test batches: 3
Epoch 0 train_loss: 0.04039204388869935
      0 val_loss: 0.038373717370378355
Epoch 1 train_loss: 0.03741528709588252
      1 val_loss: 0.036288342483711974
Epoch 2 train_loss: 0.03578236058605298
      2 val_loss: 0.03446376088263946
Epoch 3 train_loss: 0.03416070617394238
      3 val_loss: 0.033065809674102976
Epoch 4 train_loss: 0.03258077238052728
      4 val_loss: 0.031266409486905324
Epoch 5 train_loss: 0.03102293299526201
      5 val_loss: 0.030617624996152233
Epoch 6 train_loss: 0.029635949803504844
      6 val_loss: 0.027766013696475488
Epoch 7 train_loss: 0.027712003904561706
      7 val_loss: 0.02480164865093865
Epoch 8 train_loss: 0.026404912173950114
      8 val_loss: 0.02619596024168356
-----------------------------EarlyStopping counter: 1 out of 5---------------------- best epoch currently 7
Epoch 9 train_loss: 0.025091222366615033
      9 val_loss: 0.02394995368610684
Epoch 10 train_loss: 0.024262655268

0,1
best_loss,▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
test_loss,▁
train_loss,█▇▆▆▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁
val_loss,█▇▇▆▅▅▄▃▃▃▂▂▂▁▂▂▂▂▂

0,1
best_loss,0.01996
epoch,18.0
test_loss,0.02169
train_loss,0.02166
val_loss,0.02143


In [None]:
with open("results/15_11_2022__09_56_19/config.pkl", "rb") as file_to_read:
    config = pickle.load(file_to_read)
    
# inialize the model
model = AutoEncoder(config)

model.load_pretrained("results/15_11_2022__09_56_19/model.pt", model.device)

model.train(train, val, test)