# Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!nvidia-smi

## Create zip to move to other drive

In [None]:
%cd "/content/drive/MyDrive/0-FYP/codes/mmdetection-stuff/work_dir_NuCLS/"

In [None]:
!zip -r "maskrcnn_lymphocytenet3_cm3_18.zip" "maskrcnn_lymphocytenet3_cm3_18/"

## Create working dir and download zip

In [None]:
%cd "/content"

In [None]:
!mkdir "/content/drive/MyDrive/0-FYP/codes/mmdetection-stuff/work_dir_NuCLS"

In [None]:
!gdown --id "1-6TVU4glmtvVXLv8ApStWBZA34SFU0Hw" -O "maskrcnn_stm_renet2.zip"

In [None]:
!unzip "maskrcnn_stm_renet2.zip" -d "/content/drive/MyDrive/0-FYP/codes/mmdetection-stuff/work_dir_NuCLS/"

## Setup Github Repo

In [None]:
%cd "/content"
!rm -rf NuCLS
!git clone "https://github.com/abdul2706/NuCLS.git"

In [None]:
%cd /content/NuCLS

In [None]:
!pwd

## Make Necessary Directories

In [None]:
!mkdir "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/"
!mkdir "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/QC/"
!mkdir "/content/NuCLS/results/"
!mkdir "/content/NuCLS/results/tcga-nucleus/"
!mkdir "/content/NuCLS/results/tcga-nucleus/models/"
!mkdir "/content/NuCLS/results/tcga-nucleus/models/v4_2020-04-05_FINAL_CORE_QC/"

## Install Packages

In [None]:
!pip show torchvision torch

In [None]:
!pip install gitpython
!pip install histomicstk --find-links https://girder.github.io/large_image_wheels
!pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html

# Versions

In [None]:
# cpu
!python --version
!pip --version

In [None]:
# library               system          colab
# python                3.7.3           3.7.10
# pip                   21.1.2          19.3.1
# numpy                 1.17.5          1.19.5
# openslide-python      1.1.2           1.1.2
# torch                 1.7.0           1.8.1
# histomicstk                           1.1.0

In [None]:
# cpu
!pip freeze

# Download and Unzip Dataset

In [None]:
!gdown --id "1k350VQeegN5hMxRK9Vpc65fdLe3wsqYy"

In [None]:
!rm -rf "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/QC/"
!unzip "QC.zip" -d "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/" &> /dev/null

In [None]:
!ls "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/QC/csv" -1 | wc -l
!ls "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/QC/mask" -1 | wc -l
!ls "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/QC/rgbs" -1 | wc -l
!ls "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/QC/rgbs_colorNormalized" -1 | wc -l
!ls "/content/NuCLS/data/tcga-nucleus/v4_2020-04-05_FINAL_CORE/CORE_SET/QC/train_test_splits" -1 | wc -l

In [None]:
!rm -rf "QC.zip"

# Training

In [None]:
import sys
import os
from os.path import join as opj
import argparse
from pprint import pprint

# GPU allocation MUST happen before importing other modules
from GeneralUtils import save_configs, maybe_mkdir, AllocateGPU
# AllocateGPU(GPUs_to_use=args.g)

from nucls_model.MiscUtils import load_saved_otherwise_default_model_configs
from configs.nucleus_model_configs import CoreSetQC, CoreSetNoQC
from nucls_model.NucleusWorkflows import run_one_maskrcnn_fold
# from nucls_model.backbones import *


In [None]:
parser = argparse.ArgumentParser(description='Train nucleus model.')
parser.add_argument('-f', type=int, default=[1], nargs='+', help='fold(s) to run')
parser.add_argument('-g', type=int, default=[0], nargs='+', help='gpu(s) to use')
parser.add_argument('--qcd', type=int, default=1, help='use QCd data for training?')
parser.add_argument('--train', type=int, default=1, help='train?')
parser.add_argument('--vistest', type=int, default=1, help='visualize results on testing?')
args = parser.parse_args(['-f', '1', '-g', '0'])
args.qcd = bool(args.qcd)
args.train = bool(args.train)
args.vistest = bool(args.vistest)


In [None]:
TAG = '[train.py]'
BASEPATH = '/content/drive/MyDrive/0-FYP/codes/mmdetection-stuff/work_dir_NuCLS'
print(TAG, '[BASEPATH]', BASEPATH)
# model_name = 'maskrcnn_lymphocytenet3_cm3_18'
# model_name = 'maskrcnn_resnet_cbam_18'
# model_name = 'maskrcnn_lymphocytenet3_cm1_18_s2'
model_name = 'maskrcnn_lymphocytenet3_cb1_18_s2'
# model_name = 'maskrcnn_stm_renet2'
dataset_name = CoreSetQC.dataset_name if args.qcd else CoreSetNoQC.dataset_name
all_models_root = BASEPATH
print(TAG, '[all_models_root]', all_models_root)
model_root = opj(all_models_root, model_name)
print(TAG, '[model_root]', model_root)
maybe_mkdir(model_root)

# load configs
configs_path = opj(model_root, 'nucleus_model_configs.py')
cfg = load_saved_otherwise_default_model_configs(configs_path=configs_path)

print(TAG, '[cfg]')
pprint(cfg)


In [None]:
!CUDA_LAUNCH_BLOCKING=1

# for fold in args.f:
run_one_maskrcnn_fold(
    fold=1, cfg=cfg, model_root=model_root, model_name=model_name,
    qcd_training=args.qcd, train=args.train, vis_test=args.vistest)


Only For Debugging

In [None]:
%cd /content/NuCLS

In [None]:
import torch
from nucls_model.backbones import LymphocyteNet3_CB1
x = torch.rand((2, 3, 224, 224))
print(f'x.shape -> {x.shape}')
model = LymphocyteNet3_CB1(depth=18, use_dropout=False, pretrained=False, conv_type='pooling', debug=True)
print(model)
model.eval()
y = model(x)
for i, level in enumerate(y):
    print(f'level{i} -> {y[i].shape}')


# Extra Stuff

In [None]:
import pandas as pd
category_weights = pd.read_csv('train_dataset-category_weights.csv')
print(category_weights.category_weights.sum())

In [None]:
import pandas as pd
fov_weights = pd.read_csv('train_dataset-fov_weights.csv')
print(fov_weights.fov_weights.sum())

In [None]:
!nvidia-smi

In [None]:
from nucls_model.MaskRCNN import MaskRCNN
model = MaskRCNN(**cfg.MaskRCNNConfigs.maskrcnn_params)

In [None]:
model

In [None]:
import torch
model.eval()
# x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
x = [torch.rand(3, 700, 700)]
predictions = model(x)
print(predictions)

In [None]:
from torchvision.models import resnet18
from torch.nn import Sequential

In [None]:
resnet = resnet18(pretrained=False)
backbone = Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, 
                      resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4)

In [None]:
x = torch.rand(1, 3, 1216, 1216)
y = backbone(x)

In [None]:
y.shape

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np

class SizeEstimator(object):

    def __init__(self, model, input_size=(1,1,32,32), bits=32):
        '''
        Estimates the size of PyTorch models in memory
        for a given input size
        '''
        self.model = model
        self.input_size = input_size
        self.bits = bits
        return

    def get_parameter_sizes(self):
        '''Get sizes of all parameters in `model`'''
        mods = list(self.model.modules())
        sizes = []
        
        for i in range(1,len(mods)):
            m = mods[i]
            p = list(m.parameters())
            for j in range(len(p)):
                sizes.append(np.array(p[j].size()))

        self.param_sizes = sizes
        return

    def get_output_sizes(self):
        '''Run sample input through each layer to get output sizes'''
        input_ = Variable(torch.FloatTensor(*self.input_size), volatile=True)
        mods = list(self.model.modules())
        out_sizes = []
        for i in range(1, len(mods)):
            m = mods[i]
            out = m(input_)
            out_sizes.append(np.array(out.size()))
            input_ = out

        self.out_sizes = out_sizes
        return

    def calc_param_bits(self):
        '''Calculate total number of bits to store `model` parameters'''
        total_bits = 0
        for i in range(len(self.param_sizes)):
            s = self.param_sizes[i]
            bits = np.prod(np.array(s))*self.bits
            total_bits += bits
        self.param_bits = total_bits
        return

    def calc_forward_backward_bits(self):
        '''Calculate bits to store forward and backward pass'''
        total_bits = 0
        for i in range(len(self.out_sizes)):
            s = self.out_sizes[i]
            bits = np.prod(np.array(s))*self.bits
            total_bits += bits
        # multiply by 2 for both forward AND backward
        self.forward_backward_bits = (total_bits*2)
        return

    def calc_input_bits(self):
        '''Calculate bits to store input'''
        self.input_bits = np.prod(np.array(self.input_size))*self.bits
        return

    def estimate_size(self):
        '''Estimate model size in memory in megabytes and bits'''
        self.get_parameter_sizes()
        # self.get_output_sizes()
        self.calc_param_bits()
        # self.calc_forward_backward_bits()
        # self.calc_input_bits()
        # total = self.param_bits + self.forward_backward_bits + self.input_bits
        total = self.param_bits

        total_megabytes = (total/8)/(1024**2)
        return total_megabytes, total
        # return self.param_bits


In [None]:
se = SizeEstimator(model, input_size=(1, 3, 300, 300))
print(se.estimate_size())