In [1]:
# Here we take care of paths.
# Make sure root project directory is named 'VESUVIUS_Challenge' for this to work

from pathlib import Path
import os
print('Starting path:' + os.getcwd())
if os.getcwd()[-18:] == 'VESUVIUS_Challenge':
    pass
else:
    PATH = Path().resolve().parents[0]
    os.chdir(PATH)

# make sure you are in the root folder of the project
print('Current path:' + os.getcwd())

Starting path:/home/gregory_maruss/VESUVIUS_Challenge/jupyter notebooks
Current path:/home/gregory_maruss/VESUVIUS_Challenge


In [2]:
import torch
import monai
from monai.visualize import matshow3d
import segmentation_models_pytorch as smp
import matplotlib.pyplot as plt
import tempfile
import shutil
import os
import glob
import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from typing import Tuple, List
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from Data_Modules.Vesuvius_Dataset import Vesuvius_Tile_Datamodule
from lit_models.Vesuvius_Lit_Model import Lit_Model
from pytorch_lightning.callbacks import ModelCheckpoint
import torch.nn as nn
from Models.PVT_model import PyramidVisionTransformerV2
import torch.nn as nn
from functools import partial


2023-05-15 19:09:41,008 - Created a temporary directory at /tmp/tmp2peb1e8k
2023-05-15 19:09:41,009 - Writing /tmp/tmp2peb1e8k/_remote_module_non_scriptable.py


In [3]:
PATCH_SIZE = 256
Z_DIM = 16
COMPETITION_DATA_DIR_str =  "kaggle/input/vesuvius-challenge-ink-detection/"


DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# change to the line below if not using Apple's M1 or chips
# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class MyEncoder(torch.nn.Module, smp.encoders._base.EncoderMixin ):

    def __init__(self, **kwargs):
        super().__init__()
        
        self.model = PyramidVisionTransformerV2(img_size=PATCH_SIZE,
                                  patch_size=4,
                                  in_chans=Z_DIM,
                                  num_classes=1,
                                  embed_dims=[64, 128, 256, 512],
                                num_heads=[1, 2, 4, 8],
                                  mlp_ratios=[4, 4, 4, 4],
                                  qkv_bias=True,
                                  qk_scale=None,
                                  drop_rate=0.,
                                attn_drop_rate=0.,
                                  drop_path_rate=0.1,
                                  norm_layer=partial(nn.LayerNorm, eps=1e-6),
                                  depths=[3, 4, 6, 3],
                                  sr_ratios=[8, 4, 2, 1]
                                 )


        # A number of channels for each encoder feature tensor, list of integers
        self._out_channels: List[int] = [16, 0 , 64, 128, 256, 512]

        # A number of stages in decoder (in other words number of downsampling operations), integer
        # use in in forward pass to reduce number of returning features
        self._depth: int = 5

        # Default number of input channels in first Conv2d layer for encoder (usually 3)
        self._in_channels: int = 16

        # Define encoder modules below
        ...

    def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
        """Produce list of features of different spatial resolutions, each feature is a 4D torch.tensor of
        shape NCHW (features should be sorted in descending order according to spatial resolution, starting
        with resolution same as input `x` tensor).

        Input: `x` with shape (1, 3, 64, 64)
        Output: [f0, f1, f2, f3, f4, f5] - features with corresponding shapes
                [(1, 3, 64, 64), (1, 64, 32, 32), (1, 128, 16, 16), (1, 256, 8, 8),
                (1, 512, 4, 4), (1, 1024, 2, 2)] (C - dim may differ)

        also should support number of features according to specified depth, e.g. if depth = 5,
        number of feature tensors = 6 (one with same resolution as input and 5 downsampled),
        depth = 3 -> number of feature tensors = 4 (one with same resolution as input and 3 downsampled).
        """
        B, C, H, W = x.shape
        dummy = torch.empty([B, 0, H // 2, W // 2], dtype=x.dtype, device=x.device)
        
        out = self.model(x)

        return [x, dummy] + out
    

In [5]:
smp.encoders.encoders['PVT'] = {
    "encoder": MyEncoder, # encoder class here
    "pretrained_settings": {
        
    },
    "params": {
        # init params for encoder if any
    },
}



In [6]:


class CFG:
    
    device = DEVICE
    
    THRESHOLD = 0.4
    use_wandb = True
    
    ######### Dataset #########
    
    # stage: 'train' or 'test'
    stage = 'train' 
    
    # location of competition Data
    competition_data_dir = COMPETITION_DATA_DIR_str
    
    # Number of slices in z-dim: 1<z_dim<65
    z_dim = Z_DIM
    
    # fragments to use for training avalaible [1,2,3]
    train_fragment_id=[2,3]
    
    # fragments to use for validation
    val_fragment_id=[1]
    
    

    
    
    batch_size = 16
    
    # Size of the patch and stride for feeding the model
    patch_size = PATCH_SIZE
    stride = patch_size // 2
    
    
    num_workers = 8
    on_gpu = True
    
    
    ######## Model and Lightning Model paramters ############
    
    # MODEL
    model =smp.PSPNet(encoder_name='PVT',
                   encoder_weights=None, 
                   encoder_depth=5, 
                   psp_out_channels=512,
                   psp_use_batchnorm=True,
                   psp_dropout=0.2,
                   in_channels=3, 
                   classes=1, 
                   activation=None,
                   upsampling=32,
                   aux_params=None)
    
    
    
    
    
    
    checkpoint = None
    save_directory = None
    
    
    accumulate_grad_batches = 128 // batch_size  # experiments showed batch_size * accumulate_grad = 192 is optimal
    learning_rate = 0.0001
    eta_min = 1e-8
    t_max = 80
    max_epochs = 120
    weight_decay =  0.0001
    precision =16
    
    # checkpointing
    save_top_k=5
    
    monitor="FBETA"
    mode="max"
    
    
    ####### Augemtnations ###############
    
    # Training Aug
    train_transforms = [
        # A.RandomResizedCrop(
        #     size, size, scale=(0.85, 1.0)),
        A.Resize(patch_size, patch_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.75),
        A.ShiftScaleRotate(p=0.75),
        A.OneOf([
                A.GaussNoise(var_limit=[10, 50]),
                A.GaussianBlur(),
                A.MotionBlur(),
                ], p=0.4),
        
       
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        A.CoarseDropout(max_holes=1, max_width=int(patch_size * 0.3), max_height=int(patch_size * 0.3), 
                        mask_fill_value=0, p=0.5),
        # A.Cutout(max_h_size=int(size * 0.6),
        #          max_w_size=int(size * 0.6), num_holes=1, p=1.0),
        A.Normalize(
            mean= [0] * z_dim,
            std= [1] * z_dim
        ),
        ToTensorV2(transpose_mask=True),
    ]
    

    
    # Validaiton Aug
    val_transforms = [
        A.Resize(patch_size, patch_size),
        A.Normalize(
            mean= [0] * z_dim,
            std= [1] * z_dim
        ),
        ToTensorV2(transpose_mask=True),
    ]
    
    # Test Aug
    test_transforms = [
        A.Resize(patch_size, patch_size),
        A.Normalize(
            mean=[0] * z_dim,
            std=[1] * z_dim
        ),

        ToTensorV2(transpose_mask=True),
    ]
        
    
    

In [7]:
dataset = Vesuvius_Tile_Datamodule(cfg=CFG)

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

In [8]:
lit_model = Lit_Model(cfg=CFG,)

Checkpoint = False
if Checkpoint:
    lit_model = lit_model.load_from_checkpoint('logs/gcp_checkpoints/MoUB4_Bce015_Tver_alpha085epoch_64.ckpt',
                                               #learning_rate =7e-6 ,
                                                #t_max = 70,
                                               #eta_min = 1e-8,
                                               #weight_decay =  0.0001,
                                              )


[34m[1mwandb[0m: Currently logged in as: [33mgmarus[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
SAVE_DIR = 'logs/PVT_256'

checkpoint_callback = ModelCheckpoint(
    save_top_k=5,
    monitor="FBETA",
    mode="max",
    dirpath=SAVE_DIR,
    filename="PVT_256{epoch:02d}{FBETA:.2f}{val_loss:.2f}{fbeta_4:.2f}{recall:.2f}{precision:.2f}",
    save_last =True,
)


trainer = pl.Trainer(
        accelerator='gpu',
        #benchmark=True,
        max_epochs=CFG.max_epochs,
        check_val_every_n_epoch= 1,
        devices=1,
        #fast_dev_run=fast_dev_run,
        logger=pl.loggers.CSVLogger(save_dir=SAVE_DIR),
        log_every_n_steps=1,
        default_root_dir = SAVE_DIR,
        #overfit_batches=1,
        precision=CFG.precision,
        accumulate_grad_batches=CFG.accumulate_grad_batches, 
        callbacks=[checkpoint_callback],
        #resume_from_checkpoint ='logs/gcp_checkpoints/MoUB4_Bce015_Tver_alpha085epoch_64.ckpt'
        
        )





trainer.fit(lit_model, datamodule=dataset,
            #ckpt_path='logs/gcp_checkpoints/MoUB4_Bce015_Tver_alpha085epoch_64.ckpt'
           )

2023-05-15 19:10:16,883 - Using 16bit None Automatic Mixed Precision (AMP)
2023-05-15 19:10:16,946 - GPU available: True (cuda), used: True
2023-05-15 19:10:16,947 - TPU available: False, using: 0 TPU cores
2023-05-15 19:10:16,948 - IPU available: False, using: 0 IPUs
2023-05-15 19:10:16,949 - HPU available: False, using: 0 HPUs


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


2023-05-15 19:10:18,182 - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Adjusting learning rate of group 0 to 1.0000e-04.
2023-05-15 19:10:18,195 - 
  | Name                  | Type                  | Params
----------------------------------------------------------------
0 | metrics               | ModuleDict            | 0     
1 | model                 | PSPNet                | 21.1 M
2 | loss_dice             | DiceLoss              | 0     
3 | loss_tversky          | TverskyLoss           | 0     
4 | loss_focal            | FocalLoss             | 0     
5 | loss_bce              | SoftBCEWithLogitsLoss | 0     
6 | loss_monai_focal_dice | DiceFocalLoss         | 0     
----------------------------------------------------------------
21.1 M    Trainable params
0         Non-trainable params
21.1 M    Total params
42.201    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]