### Data preparations

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
!pip install neptune lightning pytorchvideo

In [None]:
import neptune

run = neptune.init_run(
    project="afonchikovd585/Cells-classification",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJkMmY4NmNkZC05OTJlLTQwZGQtOTAwZC1kYzU1MTUwMzRjMzYifQ==",
)  # your credentials

In [None]:
from pytorchvideo.data import LabeledVideoDataset, make_clip_sampler, labeled_video_dataset
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    UniformTemporalSubsample,
    Permute
)
from torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize
)
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo
)

import pandas as pd
import numpy as np
import os
import shutil
from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor

import torch
import torchvision
import torch.nn as nn
from pytorch_lightning import LightningModule, seed_everything, Trainer
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import classification_report
import torchmetrics


from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score, precision_score, recall_score, accuracy_score, roc_auc_score
from math import ceil
import time
#Plotting
import matplotlib.pyplot as plt

In [None]:
#Multi-view mode (the use of 'num_views' augmented copies of a sample for predictions)
multiView = {'isMultiView':True, 'num_views': 7}

#Random resized crop scale
RRCropScale = (1E-1, 2E-1)#(1E-1, 3E-1) # without multi-view
RRCropScaleMV = (1E-1, 2E-1)# with multi-view

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

#Visualization
figSize = (3,3)
nSamples = 16

class_names = ['0', '1']
mean = np.array([0.9133, 0.2737, 0.2737])
std  = np.array([0.1576, 0.2508, 0.2508])

In [None]:
video_transforms = Compose([
    ApplyTransformToKey(key='video',
    transform=Compose([
        #UniformTemporalSubsample(20),
        RandomResizedCrop(64,scale=RRCropScaleMV,antialias=True),
        RandomHorizontalFlip(p=0.5),
        Lambda(lambda x: x / 255.0),
        Normalize(mean,std),
        # RandomShortSideScale(min_size=248, max_size=256),
        # CenterCropVideo(224),
    ]),
    ),
])

In [None]:
def accuracy_mv(labels,pred):
  with torch.no_grad():

    n_samples = labels.shape[0]
    n_correct = (pred == labels).sum().item()

    acc_without_u = (n_correct / n_samples)

    accuracy_mv_attr = {
        'pred':pred,
        'n_samples':n_samples,
        'n_correct':n_correct,
        'acc_without_u':acc_without_u,
        }
    return accuracy_mv_attr

In [None]:
def mvc(mode_template,predictions_mv,values_mv,labels):
    ''' Inputs:
        mode_template is a zero-like tensor
        predictions_mv are multi view predictions of classes, (0,..., NUM_CLASSES)
        values_mv are the maximum values of probabilities or certainties (0,...,1)
        labels are labels

        Outputs:
        predictions_w are weighted predictions
    '''
    l = mode_template.size()
    predictions_w = mode_template * 0 #times zero just in case
    for i in range(l[0]):
        mvp = predictions_mv[:,i]
        mvw = values_mv[:,i]#torch.softmax(values_mv[:,i],-1)# / values_mv[:,i].sum()

        freq_w = torch.bincount(mvp, weights=mvw)
        _, predictions_w[i] = torch.max(freq_w,0)
        if i < nSamples: print(mvp, mvw, predictions_w[i].item(), labels[i].item(),
                                class_names[predictions_w[i].item()], class_names[labels[i].item()])
    return predictions_w


def multiViewWeightedPred(labels_mv,predictions_mv,probabilities_mv):
    mode_labels,_ = torch.mode(labels_mv,dim=0)
    mode_template = torch.zeros_like(mode_labels)

    print('probabilities')
    predictionsPB = mvc(mode_template,predictions_mv,probabilities_mv,mode_labels)

    weightedPred = {
        'predictionsPB':predictionsPB,
    }

    return weightedPred

In [None]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
ns = {'train': 0.6, 'val': 0.2,  'test': 0.2}
# Function for setting the seed
seed = 42 # random seeds are 42, 0, 17, 9, 3
target_label = 'high_rbc'
num_classes = 2

In [None]:
run['seed'] = seed

In [None]:
def test_visualization(model, dataloader, nc=num_classes):
    test_pred = []
    test_prob = []
    test_prob_comp = []
    test_labe = []
    test_X = []
    with torch.no_grad():
        for group in dataloader:
            inputs = group['video'].to(DEVICE)
            labels = group[model.target_label].to(DEVICE)
            x = model(inputs).to(DEVICE)

            prob = nn.functional.softmax(x[:,:nc],1)

            probv, pred = torch.max(prob, 1)

            test_pred.append(pred)
            test_prob.append(probv)
            test_prob_comp.append(prob)
            test_labe.append(labels)
            test_X.append(inputs)

    test_labels = torch.cat(test_labe,dim=0)
    test_predictions = torch.cat(test_pred,dim=0)
    test_probabilities=torch.cat(test_prob,dim=0)
    test_prob_complete=torch.cat(test_prob_comp,dim=0)
    test_coord = torch.cat(test_X,dim=0)

    test_inputs = test_coord

    test_attr = {
        'test_labels':test_labels,
        'test_predictions':test_predictions,
        'test_probabilities':test_probabilities,
        'test_inputs':test_inputs,
    }

    return test_attr

In [None]:
dataset_path = "/kaggle/input/cells-classification/dataset"

In [None]:
os.chdir(dataset_path)
os.chdir("..")

In [None]:
seed_everything(seed)
dataframe = pd.read_csv("DataFrame.csv", index_col=0)
dataframe = dataframe.sample(frac=1, random_state=seed)
train_size = int(dataframe.shape[0] * ns['train'])
train_data = dataframe[0:train_size]
test_data = dataframe[train_size:]
test_size = int(dataframe.shape[0] * ns['test'])
val_data = test_data[0:test_size]
test_data = test_data[test_size:]
train_data, val_data, test_data

In [None]:
class CustomDataset(LabeledVideoDataset):
    def __init__(self, dataset_path, dataframe, target_name, transforms, clip_sampler_type='random', clip_duration=1):
      df = dataframe.reset_index()
      paths = []
      for i, file_name in enumerate(df['files']):
          temp_dict = df.iloc[i].to_dict()
          temp_dict['label'] = df[target_name][i]
          temp_dict.pop('files')
          temp_dict.pop('index')
          paths.append((f"{dataset_path}/{file_name}", temp_dict))
      super().__init__(labeled_video_paths=paths,
                       clip_sampler=make_clip_sampler(clip_sampler_type, clip_duration),
                       transform=transforms, decode_audio=False)

In [None]:
# Used for testing theory, ignore that
train_dataset = CustomDataset(dataset_path=dataset_path, dataframe=train_data,
                              target_name='high_rbc',
                              transforms=video_transforms)
train_dataset.num_videos

In [None]:
checkpoint_callback = ModelCheckpoint(save_weights_only=True, 
                                      mode="min", 
                                      monitor="val/loss",
                                      dirpath="checkpoints", 
                                      filename="file")
lr_monitor = LearningRateMonitor(logging_interval="epoch")

In [None]:
class CurriculumTrainer():
    def set_difficulty(self, dataframe, target_cells, target_name, alpha, beta, gamma):
        # Calculating distances between mean and target value
        df = dataframe.copy(deep=True)
        df['distances'] = (df['rbc'] - df['rbc'].mean()).abs()

        # Normalizing blur and distances
        df['blur'] = (df['blur'] - df['blur'].min()) / (df['blur'].max() - df['blur'].min())
        df['distances'] = (df['distances'] - df['distances'].min()) / (df['distances'].max() - df['distances'].min())

        # Calculating and normalizing difficulty
        df['difficulty'] = alpha * df['blur'] + beta * df['noise'] + gamma * df['distances']
        df['difficulty'] = (df['difficulty'] - df['difficulty'].min()) / (df['difficulty'].max() - df['difficulty'].min())
        return df

    def evaluate_competence(self, max_epochs, current_epoch, c0, p):
        return min(1, (current_epoch*((1-c0**p)/max_epochs)+c0**p)**(1/p))

    def fit(self, model, dataframe, target_cells, target_name, max_epochs, c0, p, alpha, beta, gamma):
        dataframe = self.set_difficulty(dataframe, target_cells, target_name, alpha, beta, gamma)
        self.competence = c0
        seed_everything(seed)
        for epoch in range(1, max_epochs+1):
            selected_data = dataframe[dataframe.difficulty <= self.competence]
            dataset = CustomDataset(dataset_path=dataset_path, dataframe=selected_data,
                                    target_name=target_name,
                                    transforms=video_transforms)
            loader = DataLoader(dataset, batch_size=model.batch_size, num_workers=model.numworkers, pin_memory=True)
            print(f"-----------------------\nEpoch {epoch}, competence = {self.competence}, dataset size = {dataset.num_videos}")
            run["model/competence"].append(self.competence)
            run["train/dataset_size"].append(dataset.num_videos)
            self.trainer = Trainer(max_epochs=1,
                                   precision='16-mixed',
                                   accumulate_grad_batches=2,
                                   enable_progress_bar=True,
                                   enable_model_summary=False,
                                   num_sanity_val_steps=0,
                                   callbacks=[lr_monitor, checkpoint_callback])
            self.trainer.fit(model, loader)
            self.competence = self.evaluate_competence(max_epochs, epoch, c0, p)

    def validate(self, model):
#         dataset = CustomDataset(dataset_path=dataset_path, dataframe=test_data,
#                                 target_name=model.target_label,
#                                 transforms=video_transforms)
#         loader = DataLoader(dataset, batch_size=model.batch_size, num_workers=model.numworkers, pin_memory=True)
#         self.trainer.validate(model, loader)
        self.trainer.validate(model)

    def test(self, model):
#         dataset = CustomDataset(dataset_path=dataset_path, dataframe=test_data,
#                                 target_name=model.target_label,
#                                 transforms=video_transforms)
#         loader = DataLoader(dataset, batch_size=model.batch_size, num_workers=model.numworkers, pin_memory=True)
#         self.trainer.test(model, loader)
        self.trainer.test(model)

### Model

In [None]:
class TestModel(LightningModule):
    def __init__(self):
        super(TestModel, self).__init__()
        # model architecture
        self.video_model = torch.hub.load("facebookresearch/pytorchvideo", "efficient_x3d_xs", pretrained=True)
        self.relu = nn.ReLU()
        self.linear = nn.Linear(400, num_classes)

        self.lr = 1e-3
        self.batch_size = 8
        self.numworkers = 0
        # evaluation metric
        self.metric = torchmetrics.Accuracy(task='multiclass', num_classes=num_classes)
        # loss function
        self.criterion = nn.CrossEntropyLoss()
        # helpers
        self.target_label = target_label
        self.training_step_outputs = []
        self.validation_step_outputs = []
        self.testing_step_outputs = []

    def forward(self, x):
        x = self.video_model(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

    def configure_optimizers(self):
        opt = torch.optim.AdamW(params=self.parameters(), lr=self.lr)
        scheduler = CosineAnnealingLR(opt, T_max=10, eta_min=1e-6, last_epoch=-1)
        return {'optimizer': opt, 'lr_scheduler': scheduler}
    
    # This should remain commented out because in CurriculumTrainer dataloader created in every epoch.
    # Do not uncomment
    # def train_dataloader(self):
    #     dataset = CustomDataset(dataset_path=dataset_path, dataframe=train_data,
    #                           target_name=self.target_label,
    #                           transforms=video_transforms)
    #     loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
    #     return loader

    def training_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        self.training_step_outputs.append({'loss': loss, 'metric': metric})
        return {'loss': loss, 'metric': metric}

    def on_train_epoch_end(self):
        outputs = self.training_step_outputs
        loss = torch.stack([x['loss'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        metric = torch.stack([x['metric'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        self.training_step_outputs = []
        self.log('train/loss', loss)
        self.log('train/metric', metric)
        run["train/loss"].append(loss)
        run["train/metric"].append(metric)

    def val_dataloader(self):
        dataset = CustomDataset(dataset_path=dataset_path, dataframe=val_data,
                              target_name=self.target_label,
                              transforms=video_transforms)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader

    def validation_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        loss = self.criterion(out, label)
        metric = self.metric(out, label.to(torch.int64))
        self.validation_step_outputs.append({'loss': loss, 'metric': metric})
        return {'loss': loss, 'metric': metric}

    def on_validation_epoch_end(self):
        outputs = self.validation_step_outputs
        loss = torch.stack([x['loss'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        metric = torch.stack([x['metric'] for x in outputs]).mean().cpu().detach().numpy().round(2)
        self.validation_step_outputs = []
        self.log('val/loss', loss)
        self.log('val/metric', metric)
        run["val/loss"].append(loss)
        run["val/metric"].append(metric)
        print({'loss': loss, 'metric': metric})

    def test_dataloader(self):
        dataset = CustomDataset(dataset_path=dataset_path, dataframe=test_data,
                              target_name=self.target_label,
                              transforms=video_transforms)
        loader = DataLoader(dataset, batch_size=self.batch_size, num_workers=self.numworkers, pin_memory=True)
        return loader

    def test_step(self, batch, batch_idx):
        video, label = batch['video'], batch['label']
        out = self.forward(video)
        self.testing_step_outputs.append({'label': label, 'pred': out})
        return {'label': label, 'pred': out}

    def on_test_epoch_end(self):
        outputs = self.testing_step_outputs
        label = torch.cat([x['label'] for x in outputs]).cpu().detach().numpy()
        pred = torch.cat([x['pred'].argmax(dim=1) for x in outputs]).cpu().detach().numpy()
        self.testing_step_outputs = []
        print(classification_report(label, pred))

In [None]:
model = TestModel()

In [None]:
params = {
    "n_epochs": 400,
    "c0": 0.05,
    "p": 2,
    # In difficulty function: alpha * df['blur'] + beta * df['noise'] + gamma * df['distances']
    "alpha": 0.5, 
    "beta": 0, # Do not use, used for testing theory
    "gamma": 0.5 
}
run["model/parameters"] = params
run["model/architecture"] = "efficient_x3d_xs"
run["model/difficulty_func"] = "alpha * df['blur'] + beta * df['noise'] + gamma * df['distances']"

In [None]:
os.chdir("/kaggle/working/")

In [None]:
# Run this for trainer without curriculum
seed_everything(seed)
trainer = Trainer(max_epochs=params["n_epochs"],
                  precision='16-mixed',
                  accumulate_grad_batches=2,
                  enable_progress_bar=True,
                  num_sanity_val_steps=0,
                  callbacks=[lr_monitor, checkpoint_callback])

dataset = CustomDataset(dataset_path=dataset_path, dataframe=train_data,
                      target_name=model.target_label,
                      transforms=video_transforms)
loader = DataLoader(dataset, batch_size=model.batch_size, num_workers=model.numworkers, pin_memory=True)
trainer.fit(model, loader)

In [None]:
# Used for testing theory, ignore that
seed_everything(seed)
for epoch in range(1, params["n_epochs"]+1):
    dataset = CustomDataset(dataset_path=dataset_path, dataframe=train_data,
                          target_name=model.target_label,
                          transforms=video_transforms)
    loader = DataLoader(dataset, batch_size=model.batch_size, num_workers=model.numworkers, pin_memory=True)
    trainer = Trainer(max_epochs=1,
                           precision='16-mixed',
                           accumulate_grad_batches=2,
                           enable_progress_bar=True,
                           enable_model_summary=False,
                           num_sanity_val_steps=0,
                           callbacks=[lr_monitor, checkpoint_callback])
    trainer.fit(model, loader)

In [None]:
# Run this for curriculum learning
trainer = CurriculumTrainer()
start = time.time()
trainer.fit(model, train_data, 'rbc', 'high_rbc',
            params["n_epochs"], params["c0"], params["p"],
            params["alpha"], params["beta"], params["gamma"])
stop = time.time()

In [None]:
print(f"Elapsed time: {stop - start}")
run['elapsed_time'] = stop - start

In [None]:
model = TestModel.load_from_checkpoint(checkpoint_callback.best_model_path)

In [None]:
trainer.validate(model)

In [None]:
trainer.test(model)

In [None]:
run.stop()

In [None]:
dataset = CustomDataset(dataset_path=dataset_path, dataframe=test_data,
                              target_name=model.target_label,
                              transforms=video_transforms)
test_dataloader = DataLoader(dataset, batch_size=model.batch_size, num_workers=model.numworkers, pin_memory=True)

In [None]:
test_labels_mv = []
test_predictions_mv = []
test_probabilities_mv = []

for i in range(multiView['num_views']):
    test_attr = test_visualization(model.to(DEVICE), test_dataloader)
#     fig = plt.figure(figsize=(10, 10))
#     out = torchvision.utils.make_grid(test_attr['test_inputs'][:nSamples].to('cpu'))
#     imshow(out,title='Multi-view test')
    test_labels_mv.append(test_attr['test_labels'].to('cpu').numpy())
    test_predictions_mv.append(test_attr['test_predictions'].to('cpu').numpy())
    test_probabilities_mv.append(test_attr['test_probabilities'].to('cpu').numpy())
    
    del test_attr

labels_mv = torch.as_tensor(test_labels_mv)
predictions_mv = torch.as_tensor(test_predictions_mv)
probabilities_mv=torch.as_tensor(test_probabilities_mv)

print('labels',labels_mv[:,:nSamples])
print('predictions',predictions_mv[:,:nSamples])
print('probabilities', probabilities_mv[:,:nSamples])

In [None]:
mode_labels,_ = torch.mode(labels_mv,dim=0)

In [None]:
weightedPred = multiViewWeightedPred(labels_mv,predictions_mv,probabilities_mv)
print(weightedPred)
n_correct_MVS_PB = (weightedPred['predictionsPB'] == mode_labels).sum().item()

n_samples = mode_labels.shape[0]
accuracy_MVS_PB = n_correct_MVS_PB / n_samples

print('Soft multi view')
print('n_samples',n_samples)
print(f'accuracy of prediction based soft MV = {accuracy_MVS_PB}')

In [None]:
cm = confusion_matrix(mode_labels.to('cpu'), weightedPred['predictionsPB'].to('cpu'))
print(f'Accuracy of prediction based soft MV is {accuracy_MVS_PB}')
cm_display = ConfusionMatrixDisplay(cm,display_labels=class_names).plot()