# Import libraries and functions

In [None]:
import pandas as pd
import torch
from prixfixe.autosome import AutosomeFirstLayersBlock, AutosomeCoreBlock, AutosomeFinalLayersBlock, AutosomeDataProcessor, AutosomeTrainer
from prixfixe.bhi import BHIFirstLayersBlock, BHICoreBlock
from prixfixe.unlockdna import UnlockDNACoreBlock
from prixfixe.prixfixe import PrixFixeNet
import sys
import os
import shutil

# Initialize paths and variables

In [None]:
CUDA_DEVICE_ID = 0
TRAIN_DATA_PATH = f"data/train.txt"
VALID_DATA_PATH = f"data/val.txt"
TEST_DATA_PATH = f"data/test.txt"
MODEL_LOG_DIR = f"model_weights"
TRAIN_BATCH_SIZE = 32
N_PROCS = 4
VALID_BATCH_SIZE = 32
lr = 0.005 # 0.001 for DREAM-Attn, 0.005 for DREAM-CNN and DREAM-RNN
BATCH_PER_EPOCH = len(pd.read_csv(TRAIN_DATA_PATH))//TRAIN_BATCH_SIZE
BATCH_PER_VALIDATION = len(pd.read_csv(VALID_DATA_PATH))//TRAIN_BATCH_SIZE
SEQ_SIZE = 230
NUM_EPOCHS = 80
generator = torch.Generator()
generator.manual_seed(42)
device = torch.device(f"cuda:{CUDA_DEVICE_ID}")

# DREAM-CNN

In [None]:
first = BHIFirstLayersBlock(
            in_channels = 5,
            out_channels = 320,
            seqsize = 230,
            kernel_sizes = [9, 15],
            pool_size = 1,
            dropout = 0.2
        )

core = AutosomeCoreBlock(in_channels=first.out_channels,
                        out_channels =64,
                        seqsize=first.infer_outseqsize())

final = AutosomeFinalLayersBlock(in_channels=core.out_channels)

model = PrixFixeNet(
    first=first,
    core=core,
    final=final,
    generator=generator
)

from torchinfo import summary
print(summary(model, (1, 5, 230)))

# DREAM-RNN

In [None]:
first = BHIFirstLayersBlock(
            in_channels = 5,
            out_channels = 320,
            seqsize = 230,
            kernel_sizes = [9, 15],
            pool_size = 1,
            dropout = 0.2
        )

core = BHICoreBlock(
in_channels = first.out_channels,
out_channels = 320,
seqsize = first.infer_outseqsize(),
lstm_hidden_channels = 320,
kernel_sizes = [9, 15],
pool_size = 1,
dropout1 = 0.2,
dropout2 = 0.5
)

final = AutosomeFinalLayersBlock(in_channels=core.out_channels)

model = PrixFixeNet(
    first=first,
    core=core,
    final=final,
    generator=generator
)

from torchinfo import summary
print(summary(model, (1, 5, 230)))

# DREAM-Atttn

In [None]:
first = AutosomeFirstLayersBlock(in_channels=5,
                                out_channels=256, 
                                seqsize=230)
core = UnlockDNACoreBlock(
    in_channels = first.out_channels, out_channels= first.out_channels, seqsize = 230, 
    n_blocks = 4,kernel_size = 15, rate = 0.1, num_heads = 8)

final = AutosomeFinalLayersBlock(in_channels=core.out_channels)

model = PrixFixeNet(
    first=first,
    core=core,
    final=final,
    generator=generator
)

from torchinfo import summary
print(summary(model, (1, 5, 230)))

# DataProcessor

In [None]:
dataprocessor = AutosomeDataProcessor(
    path_to_training_data=TRAIN_DATA_PATH,
    path_to_validation_data=VALID_DATA_PATH,
    path_to_test_data=TEST_DATA_PATH,
    train_batch_size=TRAIN_BATCH_SIZE, 
    batch_per_epoch=BATCH_PER_EPOCH,
    train_workers=N_PROCS,
    valid_batch_size=VALID_BATCH_SIZE,
    valid_workers=N_PROCS,
    shuffle_train=True,
    shuffle_val=False,
    seqsize=SEQ_SIZE,
    generator=generator
)

In [None]:
next(dataprocessor.prepare_train_dataloader())

# Train

In [None]:
trainer = AutosomeTrainer(
    model,
    device=torch.device(f"cuda:{CUDA_DEVICE_ID}"), 
    model_dir=MODEL_LOG_DIR,
    dataprocessor=dataprocessor,
    num_epochs=NUM_EPOCHS,
    lr = lr)

trainer.fit()