In [None]:
#
# TO-DO: (2/24/21)
#
# * NARROW GENES IN EXPERIMENTAL DATASETS
#
# * remove duplicate genes from ExpressionData files
# * make sure refNetwork columns are in correct order
#

import torch
import numpy             as np
import pandas            as pd
import pytorch_lightning as pl

from torch.nn    import Conv2d
from torch.nn    import Linear
from torch.nn    import Sigmoid
from torch.nn    import functional as F
from torch.optim import Adam

from pytorch_lightning.metrics.functional.classification import precision_recall_curve, auc

from torch.utils.data import DataLoader
from pytorch_dataset  import Dataset    # custom dataset class

In [None]:
train = Dataset(root_dir='cnn_datasets/experimental/', rel_path='*/*/ExpressionData.csv', overwrite=True, batchSize=512) 

In [None]:
val = Dataset(root_dir='cnn_datasets/synthetic_1/', rel_path='*/*/ExpressionData.csv', overwrite=True, batchSize=32)

In [None]:
class ClassifyGRN(pl.LightningModule):
    
    def __init__(self):
        super().__init__()
        
        self.convolutional_layer = Conv2d(1, 32, kernel_size=(2,5))
        self.dense_linear_layer  = Linear(32, 32)
        self.linear_output_layer = Linear(32, 1)
        self.output_activation   = Sigmoid()
        
        
    def forward(self, x):
        # 2d convolutional layer
        x = self.convolutional_layer(x)
        x = torch.relu(x)
        x = torch.squeeze(x)
        w = x.size()[-1]
        
        # global average pooling layer
        x = F.avg_pool1d(x, kernel_size=w)
        x = torch.squeeze(x)

        # dense linear layer
        x = self.dense_linear_layer(x)
        x = torch.relu(x)
        
        # linear output layer
        x = self.linear_output_layer(x)
        x = self.output_activation(x)
        return x
    
    
    def configure_optimizers(self):
        optimizer = Adam(self.parameters(), lr=1e-2)
        return optimizer
    
    
    def binary_cross_entropy_loss(self, pred, labels):
        return F.binary_cross_entropy(pred, labels)
    
    
    def training_step(self, train_batch, batch_idx):
        X, y = train_batch
        pred = self.forward(X)
        loss = self.binary_cross_entropy_loss(pred, y)
        precision, recall, _ = precision_recall_curve(pred, y) 
        self.log('train_auprc', auc(recall, precision))
        self.log('train_loss', loss)
        return loss
    
#     def validation_step(self, val_batch, batch_idx):
#         X, y = val_batch
#         pred = self.forward(X)
#         loss = self.binary_cross_entropy_loss(pred, y)
#         precision, recall, _ = precision_recall_curve(pred, y) 
#         self.log('val_auprc', auc(recall, precision))
#         self.log('val_loss', loss)     

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/ --port=6008

In [None]:
trainer = pl.Trainer()
model = ClassifyGRN()

train_loader = DataLoader(train, batch_size=None, shuffle=True)
# val_loader = DataLoader(val, batch_size=None, shuffle=True)

# trainer.fit(model, train_loader, val_loader)
trainer.fit(model, train_loader)

In [None]:
# model.add(Conv2D(32, (2,5),
          kernel_initializer='he_uniform', 
#           activation='relu')) 

# model.add(GlobalAveragePooling2D())

# model.add(Dense(32, activation='relu'))

# model.add(Dense(1, activation='sigmoid')) 

# opt = SGD(lr=0.01, momentum=0.9)

# model.compile(optimizer=opt, 
#               loss='binary_crossentropy', 
              metrics=[AUC(curve='PR')])

In [None]:
# model.fit(train, validation_data=val, epochs=10)