# UnlockDNA model run example

In [None]:
ARGS = {
  'model_id' : 'm20220727e',
  'global_seed' : 123,
  'shuffle_size' : 1000,
  'max_width' : 100,
  'head_len' : 17,
  'tail_len' : 13,
  'pct_ds' : 1, # % of total data for training/testing,
  'train_split' : 0.95,
  'alphabets' : {'A' : 0, 'C' : 1, 'G' : 2, 'T' : 3, 'N' : 4, 'M' : 5},
  'initial_lr' : 1e-15,
  'max_lr' : 3e-4,
  'initial_epoch': 0,
  'epochs' : 20,
  'batch_size' : 2,
  'dropout_rate' : 0.1,
  'kmer': 10,
  'strides' : 1,
  'embedding_dim' : 512,
  'num_heads' : 8,
  'ff_mult' : 4,
  'num_projectors' : 32,
  'n_blocks_regressor' : 4,
  'warmup_steps' : 12500, # ~ 1 epoch
  'mask_ratio' : 0.05,
  'remote_sample_submission_file' : 'https://raw.githubusercontent.com/de-Boer-Lab/DREAM-2022/main/sample_submission.json',
  'eval' : False,
  'device':'cuda:1'
}

In [None]:
TRAIN_DATA_PATH = "/home/dnogina/dreamdata/contest_data/train.txt"
VALID_DATA_PATH = "/home/dnogina/dreamdata/contest_data/val.txt"
TRAIN_BATCH_SIZE = 1024
BATCH_PER_EPOCH = 1000
N_PROCS = 8
VALID_BATCH_SIZE = 4096
BATCH_PER_VALIDATION = 125 
PLASMID_PATH = "plasmid.json"
SEQ_SIZE = 150

In [None]:
import torch
from prixfixe.autosome import AutosomeDataProcessor

generator = torch.Generator()
generator.manual_seed(2147483647)

dataprocessor = AutosomeDataProcessor(
    path_to_training_data=TRAIN_DATA_PATH,
    path_to_validation_data=VALID_DATA_PATH,
    train_batch_size=TRAIN_BATCH_SIZE, 
    batch_per_epoch=BATCH_PER_EPOCH,
    train_workers=N_PROCS,
    valid_batch_size=VALID_BATCH_SIZE,
    batch_per_valid=BATCH_PER_VALIDATION,
    valid_workers=N_PROCS,
    shuffle_train=True,
    shuffle_val=False,
    plasmid_path=PLASMID_PATH,
    seqsize=SEQ_SIZE,
    generator=generator
)

The creation of the model is very simple. 

The only thing you should notice is the requirement for `DataProcessor` to have a `data_channels` and `data_seqsize` methods. Otherwise, it is impossible to configure `FirstLayerBlock` correctly.


In [None]:
from prixfixe.autosome import (AutosomeCoreBlock,
                      AutosomeFirstLayersBlock,
                      AutosomeFinalLayersBlock)

from prixfixe.prixfixe import PrixFixeNet


first = AutosomeFirstLayersBlock(in_channels=dataprocessor.data_channels(),
                                   out_channels=256, 
                                   seqsize=dataprocessor.data_seqsize())
core = AutosomeCoreBlock(in_channels=first.out_channels,
                         out_channels =64,
                         seqsize=first.infer_outseqsize())
final = AutosomeFinalLayersBlock(in_channels=core.out_channels, 
                                 seqsize=core.infer_outseqsize())
model = PrixFixeNet(
    first=first,
    core=core,
    final=final,
    generator=generator
)


Check if model works correctly

In [None]:
model.check()

Model training is also simple 

In [None]:
NUM_EPOCHS = 1
MODEL_LOG_DIR = "/home/penzard/autosome_model4"
CUDA_DEVICE_ID = 1

In [None]:
import torch 
from prixfixe.autosome import AutosomeTrainer
trainer = AutosomeTrainer(
    model,    
    device=torch.device(f"cuda:{CUDA_DEVICE_ID}"), 
    model_dir=MODEL_LOG_DIR,
    dataprocessor=dataprocessor,
    num_epochs=NUM_EPOCHS)

In [None]:
trainer.fit()