# Parameters

In [1]:
data_size = 2000; dataset_views_to_consider = 'all'; seed = 42
d_input_enc=2000; nb_classes_dec=33; class_weights=[]; d_model_enc_dec=1024; d_ff_enc_dec=1024; n_heads_enc_dec=16; n_layers_enc=2; n_layers_dec=2; activation="relu"; dropout=0.1 
model_params = {
    "d_input_enc": 2000, 
    "lr": 6.033193735866575e-05,
    "nb_classes_dec": 33,
    "early_stopping": True,
    "dropout": 0.16171970479206027,
    "weight_decay": 5.4598394312421854e-05,
    "activation": "relu",
    "optimizer": "Adam",
    "lr_scheduler": "cosine_with_restarts",
    "loss": "ce",
    "n_epochs": 1, 
    "batch_size": 256,
    "class_weights":[4.03557312, 0.85154295, 0.30184775, 1.18997669, 8.25050505,
            0.72372851, 7.73484848, 1.81996435, 0.62294082, 0.61468995,
            4.07992008, 0.49969411, 1.07615283, 1.85636364, 0.7018388 ,
            0.84765463, 0.60271547, 0.62398778, 4.26750261, 0.61878788,
            1.89424861, 1.98541565, 0.65595888, 2.05123054, 1.37001006,
            0.77509964, 0.76393565, 2.67102681, 0.64012539, 2.94660895,
            0.64012539, 6.51355662, 4.64090909],
    "d_model_enc_dec": 512,
    "n_heads_enc_dec": 16,
    "n_layers_enc": 10,
    "n_layers_dec": 1
}
d_ff_enc_dec_value = model_params["d_model_enc_dec"] * 4
model_params["d_ff_enc_dec"] = d_ff_enc_dec_value

fit_params = {
    "nb_ckpts":1, 
    "verbose":1
}

predict_params = {
    "nb_ckpts":1, 
    "scores_fname": "transformer_scores.json"
}

training_params = {
    "model_params": model_params,
    "fit_params": fit_params,
    "predict_params": predict_params,
    "data_size": int('2000'),
    "dataset_views_to_consider": 'all',
    "exp_type": "data_aug",
    "seed": 42
}
output_path = './'

# Import section

In [2]:
import argparse
import os
import json
import torch
import random
import natsort
import numpy as np
import pandas as pd
from tqdm import tqdm
from argparse import Namespace


import optuna
from optuna.study import StudyDirection
from packaging import version
from multiomic_modeling.models.trainer_multimodal import MultiomicTrainerMultiModal
from multiomic_modeling.models.base_multimodal import BaseMultiModalTrainer
from multiomic_modeling.data.data_loader import MultiomicDatasetDataAug, MultiomicDatasetNormal, MultiomicDatasetBuilder, SubsetRandomSampler
from multiomic_modeling.models.models import MultiomicPredictionModel, MultiomicPredictionModelMultiModal
from multiomic_modeling.models.utils import expt_params_formatter, c_collate
from multiomic_modeling.loss_and_metrics import ClfMetrics, NumpyEncoder, RegMetrics
from multiomic_modeling.utilities import params_to_hash
from multiomic_modeling.torch_utils import to_numpy, totensor, get_optimizer
from multiomic_modeling import logging
from torch.utils.data import DataLoader
from transformers.optimization import Adafactor, AdamW, \
    get_cosine_schedule_with_warmup, get_cosine_with_hard_restarts_schedule_with_warmup

import pytorch_lightning as pl
import torch

if version.parse(pl.__version__) < version.parse("1.0.2"):
    raise RuntimeError("PyTorch Lightning>=1.0.2 is required for this example.")



In [None]:
model = MultiomicTrainerMultiModal.run_experiment(**training_params, output_path=output_path)

>>> Training configuration : 
{
  "data_size": 2000,
  "dataset_views_to_consider": "all",
  "exp_type": "data_aug",
  "fit_params": {
    "nb_ckpts": 1,
    "verbose": 1
  },
  "kwargs": {},
  "model_params": {
    "activation": "relu",
    "batch_size": 256,
    "class_weights": [
      4.03557312,
      0.85154295,
      0.30184775,
      1.18997669,
      8.25050505,
      0.72372851,
      7.73484848,
      1.81996435,
      0.62294082,
      0.61468995,
      4.07992008,
      0.49969411,
      1.07615283,
      1.85636364,
      0.7018388,
      0.84765463,
      0.60271547,
      0.62398778,
      4.26750261,
      0.61878788,
      1.89424861,
      1.98541565,
      0.65595888,
      2.05123054,
      1.37001006,
      0.77509964,
      0.76393565,
      2.67102681,
      0.64012539,
      2.94660895,
      0.64012539,
      6.51355662,
      4.64090909
    ],
    "d_ff_enc_dec": 2048,
    "d_input_enc": 2000,
    "d_model_enc_dec": 512,
    "dropout": 0.16171970479206027,
  

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.

  | Name    | Type                               | Params
---------------------------------------------------------------
0 | network | MultiomicPredictionModelMultiModal | 42.0 M
---------------------------------------------------------------
42.0 M    Trainable params
0         Non-trainable params
42.0 M    Total params
168.010   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

In [None]:
# dataset_views_to_consider='all'
dataset = MultiomicDatasetNormal(data_size=data_size, views_to_consider=dataset_views_to_consider)

train, test, valid = MultiomicDatasetBuilder().multiomic_data_normal_builder(dataset=dataset, 
                                                                                         test_size=0.2, 
                                                                                         valid_size=0.1, 
                                                                                         random_state=seed)

dataset_augmented = MultiomicDatasetDataAug(train_dataset=train, data_size=data_size, views_to_consider=dataset_views_to_consider)

train_augmented = MultiomicDatasetBuilder.multiomic_data_aug_builder(augmented_dataset=dataset_augmented)

In [None]:
ll

In [None]:
cd ..

In [None]:
ll

In [None]:
config_file = '649c350f57b9ffb4277a24511a90696abae3269a/config.json'

In [None]:
with open(config_file, 'r') as f:
    all_params = json.load(f)

In [None]:
random.seed(all_params['seed'])
np.random.seed(all_params['seed'])
torch.manual_seed(all_params['seed'])
trainer_model = MultiomicTrainerMultiModal(Namespace(**all_params['model_params']))

In [None]:
save_file_name = 'naive_scores'
views_to_consider = 'all'

In [None]:
scores_fname = os.path.join(all_params['fit_params']['output_path'], f'{save_file_name}_{views_to_consider}.txt')

In [None]:
artifact_dir=all_params['fit_params']['output_path'] 
nb_ckpts=all_params['predict_params'].get('nb_ckpts', 1)

In [None]:
ckpt_path = os.path.join(artifact_dir, 'checkpoints')
ckpt_fnames = natsort.natsorted([os.path.join(ckpt_path, x) for x in os.listdir(ckpt_path)
                                 if x.endswith('.ckpt')])
print(*ckpt_fnames)
ckpt_fnames = ckpt_fnames[:nb_ckpts]
trainer_model.load_average_weights(ckpt_fnames)
batch_size = trainer_model.hparams.batch_size  
ploader = DataLoader(test, collate_fn=c_collate, batch_size=batch_size, shuffle=False)  


In [None]:
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, confusion_matrix, classification_report, matthews_corrcoef, mean_absolute_error, mean_squared_error, r2_score
class RegMetrics:
    @staticmethod
    def score(y_test, y_pred):
        return {
            'r2': np.round(r2_score(y_test, y_pred) * 100, 3),
            'mse': np.round(mean_squared_error(y_test, y_pred) * 100, 3),
            'mae': np.round(mean_absolute_error(y_test, y_pred) * 100, 3)
        }
    

In [None]:
for i, (x, patient_label, patient_name) in tqdm(enumerate(ploader)):
    pred_1 , pred_2 = trainer_model.network.predict(inputs=x)
    print('prediction')
    print(f'pred 1: {pred_1.shape}')
    print(f'pred 2: {pred_2.shape}')
    preds_views_shape = pred_2.shape
    pred_2 = pred_2.reshape(preds_views_shape[1], preds_views_shape[0], -1) 
    print(f'pred 2: {pred_2.shape}')
#     print(x[0].shape)
#     print(x[1])
#     print(x[1].shape)
    print('----------------------------------------------------')

In [None]:
example_1 = next(iter(test))

In [None]:
example_1[0]

In [None]:
trainer_model.network.predict(inputs=example_1[0])