<a href="https://www.kaggle.com/code/averma111/pytorch-hubmap-cnn?scriptVersionId=131277708" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [2]:
%%capture 
!pip install torchmetrics

In [3]:
%%capture
!pip install torchsummary

In [9]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from glob import glob
import json
from torch.utils.data import Dataset, DataLoader
from torchmetrics import AveragePrecision
from torchmetrics.classification import BinaryF1Score
from torchsummary import summary as torchsummary
import torch
import torchvision

In [8]:
class Config:
    batch_size= 128
    n_epochs = 50
    learning_rate = 0.001
    opt_func = torch.optim.Adam
    
    
    
config = Config()

In [6]:
class acquisition:
    
    def __init__(self,test_path,train_path):
        self.test_path = test_path
        self.train_path = train_path
        
    def get_datframe(self,path):
        return pd.read_csv(path)
    
    def get_json_dataframe(self, json_file):
        data = []
        with open(json_file, 'r') as file:
            for line in file:
                item = json.loads(line)
                data.append(item)
        
        json_df = pd.DataFrame(data)
        
        return json_df
    
    def get_image_path(self):
        train_image_path = glob(self.train)
        test_image_path = glob(self.test)
        return train_image_path,test_image_path
        
        
        

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
class HuBMAPClassificationBaseNN(torch.nn.Module):
    
    def training_step(self,batch):
        features,labels = batch
        out = self(features)
        loss = F.binary_cross_entropy(out,labels)
        return loss
    
    def validation_step(self, batch):
        features, labels = batch 
        out = self(features)                    # Generate predictions
        loss = F.binary_cross_entropy(out, labels)   # Calculate loss
        acc = aurpc(out, labels)           # Calculate accuracy
        return {'Validation_loss': loss.detach(), 'Validation_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['Validation_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['Validation_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'Validation_loss': epoch_loss.item(), 'Validation_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        if epoch%5==0:
            print("Epoch [{}], Train_loss: {:.4f}, Validation_loss: {:.4f}, Validation_acc: {:.4f}".format(
            epoch, result['Train_loss'], result['Validation_loss'], result['Validation_acc']))

In [7]:
class HuBMAPClassificationNN(torch.nn.Module):
    def __init__(self):
        super(HuBMAPClassificationBaseNN, self).__init__()
        
        self.network_1 = nn.Sequential(
            torch.nn.Conv2d(3, 128, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.Conv2d(128, 128, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.network_2 = nn.Sequential(
            torch.nn.Conv2d(128, 128, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.Conv2d(128, 128, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.ConvTranspose2d(128, 1, kernel_size=2, stride=2)
        )

    def forward(self, x):
        x = self.network_1(x)
        x = self.network_2(x)
        x = torch.sigmoid(x)
        return x

In [None]:
model = HuBMAPClassificationNN().to(device)

In [None]:
torchsummary(model, X_data.size(), batch_size=-1, device='cuda')

In [None]:
class Trainer:
    
    def aurpc(outputs, labels):
        aurpc = AveragePrecision(task="binary")
        return aurpc(outputs, labels)

  
    @torch.no_grad()
    def evaluate(model, val_loader):
        model.eval()
        outputs = [model.validation_step(batch) for batch in val_loader]
        return model.validation_epoch_end(outputs)

  
    def fit(epochs, lr, model, train_loader, val_loader, opt_func = OPT_FUNC):
    
        history = []
        optimizer = opt_func(model.parameters(),lr)
        for epoch in tqdm(range(epochs)):
        
            model.train()
            train_losses = []
            for batch in train_loader:
                loss = model.training_step(batch)
                train_losses.append(loss)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            
            result = evaluate(model, val_loader)
            result['Train_loss'] = torch.stack(train_losses).mean().item()
            model.epoch_end(epoch, result)
            history.append(result)
    
        return history
    
    
trainer = Trainer()

In [None]:
history = trainer.fit(config.n_epochsn, config.learning_rate, model, train_dl, val_dl, config.opt_func)

In [None]:
def plot_accuracies(history):
    """ Plot the history of accuracies"""
    accuracies = [x['Validation_acc'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy vs. No. of epochs');
    

plot_accuracies(history)

In [None]:
def plot_losses(history):
    """ Plot the losses in each epoch"""
    train_losses = [x.get('Train_loss') for x in history]
    val_losses = [x['Validation_loss'] for x in history]
    plt.plot(train_losses, '-bx')
    plt.plot(val_losses, '-rx')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs');

plot_losses(history)