In [1]:
# Here we take care of paths.
# Make sure root project directory is named 'VESUVIUS_Challenge' for this to work

from pathlib import Path
import os
print('Starting path:' + os.getcwd())
if os.getcwd()[-18:] == 'VESUVIUS_Challenge':
    pass
else:
    PATH = Path().resolve().parents[0]
    os.chdir(PATH)

# make sure you are in the root folder of the project
print('Current path:' + os.getcwd())

Starting path:/home/gregory_maruss/VESUVIUS_Challenge/jupyter notebooks
Current path:/home/gregory_maruss/VESUVIUS_Challenge


In [2]:
import torch
import monai
from monai.visualize import matshow3d
import segmentation_models_pytorch as smp
import matplotlib.pyplot as plt
import tempfile
import shutil
import os
import glob
import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from typing import Tuple, List
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from Data_Modules.Vesuvius_Dataset import Vesuvius_Tile_Datamodule
from lit_models.Vesuvius_Lit_Model import Lit_Model
from pytorch_lightning.callbacks import ModelCheckpoint
import torch.nn as nn



2023-05-13 17:58:46,225 - Created a temporary directory at /tmp/tmpun52wtmd
2023-05-13 17:58:46,226 - Writing /tmp/tmpun52wtmd/_remote_module_non_scriptable.py


In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# change to the line below if not using Apple's M1 or chips
# DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [4]:
PATCH_SIZE = 256
Z_DIM = 16
COMPETITION_DATA_DIR_str =  "kaggle/input/vesuvius-challenge-ink-detection/"


In [5]:
class Model(nn.Module):
    
    def __init__(self):
        
        super().__init__()
        
        self.pre_model =monai.networks.nets.UNet(
            spatial_dims=3,
            in_channels=1,
            out_channels=1,
            channels=(8,16, 32, 64,128),
            strides=(2, 2, 2, 2),
            num_res_units=2,
            dropout=0,
            norm='batch',
            bias =False,

        )
        
        self.model = monai.networks.nets.FlexibleUNet(in_channels =Z_DIM,
                              out_channels =1 ,
                              backbone = 'efficientnet-b3',
                              pretrained=True,
                              decoder_channels=( 256, 128, 64, 32, 16 ),
                              spatial_dims=2,
                              norm=('batch', {'eps': 0.001, 'momentum': 0.1}),
                              act=('relu', {'inplace': True}),
                              #act = None,
                              dropout=0.0,
                              decoder_bias=False,
                              upsample='deconv',
                              interp_mode='nearest',
                              is_pad=False)
        
    def forward(self, x):
        x = self.pre_model(x)
        x = x.squeeze(1)
        out = self.model(x)
        return out
    
    


In [6]:

class CFG:
    
    device = DEVICE
    accelerator = 'gpu'
    THRESHOLD = 0.4
    use_wandb = True
    
    ######### Dataset #########
    
    # stage: 'train' or 'test'
    stage = 'train' 
    
    # location of competition Data
    competition_data_dir = COMPETITION_DATA_DIR_str
    
    # Number of slices in z-dim: 1<z_dim<65
    z_dim = Z_DIM
    
    # fragments to use for training avalaible [1,2,3]
    train_fragment_id=[2,3]
    
    # fragments to use for validation
    val_fragment_id=[1]
    
    

    
    
    batch_size = 8
    
    # Size of the patch and stride for feeding the model
    patch_size = PATCH_SIZE
    stride = patch_size // 2
    
    
    num_workers = 8
    on_gpu = True
    
    
    
    ######## Model and Lightning Model paramters ############
    
    # MODEL
    model = Model()
    
    
    
    
    checkpoint = None
    save_directory = None
    
    
    accumulate_grad_batches = 96 // batch_size # experiments showed batch_size * accumulate_grad = 192 is optimal
    learning_rate = 0.0001
    eta_min = 1e-8
    t_max = 75
    max_epochs = 120
    weight_decay =  0.0001
    precision =16
    
    # checkpointing
    save_top_k=5
    
    monitor="FBETA"
    mode="max"
    
    
    ####### Augemtnations ###############
    
    # Training Aug
    train_transforms = [
        # A.RandomResizedCrop(
        #     size, size, scale=(0.85, 1.0)),
        A.Resize(patch_size, patch_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.75),
        A.ShiftScaleRotate(p=0.75),
        A.OneOf([
                A.GaussNoise(var_limit=[10, 50]),
                A.GaussianBlur(),
                A.MotionBlur(),
                ], p=0.4),
        
       
        A.augmentations.geometric.transforms.ElasticTransform(alpha=120,
                                                                  sigma=120*0.05,
                                                                  alpha_affine=120 * 0.03,
                                                                  interpolation=1,
                                                                  border_mode=cv2.BORDER_CONSTANT,
                                                                  value=0,
                                                                  mask_value=0,
                                                                  always_apply=False,
                                                                  approximate=False,
                                                                  same_dxdy=False,
                                                                  p=0.3),
        A.GridDistortion(num_steps=5, distort_limit=0.3, p=0.5),
        A.CoarseDropout(max_holes=1, max_width=int(patch_size * 0.3), max_height=int(patch_size * 0.3), 
                        mask_fill_value=0, p=0.5),
        # A.Cutout(max_h_size=int(size * 0.6),
        #          max_w_size=int(size * 0.6), num_holes=1, p=1.0),
        A.Normalize(
            mean= [0] * z_dim,
            std= [1] * z_dim
        ),
        ToTensorV2(transpose_mask=True),
    ]
    

    
    # Validaiton Aug
    val_transforms = [
        A.Resize(patch_size, patch_size),
        A.Normalize(
            mean= [0] * z_dim,
            std= [1] * z_dim
        ),
        ToTensorV2(transpose_mask=True),
    ]
    
    # Test Aug
    test_transforms = [
        A.Resize(patch_size, patch_size),
        A.Normalize(
            mean=[0] * z_dim,
            std=[1] * z_dim
        ),

        ToTensorV2(transpose_mask=True),
    ]
        
    
    

In [7]:
dataset = Vesuvius_Tile_Datamodule(cfg=CFG)

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

In [8]:
lit_model = Lit_Model(cfg=CFG,)

Checkpoint = False
if Checkpoint:
    lit_model = lit_model.load_from_checkpoint('logs/EFB3_SMPUplus_Bce025_Tver_alpha08/last.ckpt',
                                               learning_rate =7e-6 ,
                                                t_max = 70,
                                               eta_min = 1e-8,
                                               weight_decay =  0.0001,
                                              )


[34m[1mwandb[0m: Currently logged in as: [33mgmarus[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
SAVE_DIR = 'logs/3D_MoUB2_Bce50_Tver_alpha07_16_256'

checkpoint_callback = ModelCheckpoint(
    save_top_k=5,
    monitor="FBETA",
    mode="max",
    dirpath=SAVE_DIR,
    filename="3D_MoUB2_Bce50_Tver_alpha07_16_256{epoch:02d}{FBETA:.2f}{val_loss:.2f}{fbeta_4:.2f}{recall:.2f}{precision:.2f}",
    save_last =True,
)


trainer = pl.Trainer(
        accelerator=CFG.accelerator,
        #benchmark=True,
        max_epochs=CFG.max_epochs,
        check_val_every_n_epoch= 1,
        devices=1,
        #fast_dev_run=fast_dev_run,
        logger=pl.loggers.CSVLogger(save_dir=SAVE_DIR),
        log_every_n_steps=1,
        default_root_dir = SAVE_DIR,
        #overfit_batches=1,
        precision=CFG.precision,
        accumulate_grad_batches=CFG.accumulate_grad_batches, 
        callbacks=[checkpoint_callback],
        #resume_from_checkpoint ='logs//MoUB4_2Bce3_Tver_alpha075+05focal/MoUB4_2Bce3_Tver_alpha075_05focal.ckpt'
        
        )





trainer.fit(lit_model, datamodule=dataset,
            #ckpt_path='logs/Local_Originalgood/last.ckpt'
           )

2023-05-13 17:59:23,354 - Using 16bit None Automatic Mixed Precision (AMP)
2023-05-13 17:59:23,416 - GPU available: True (cuda), used: True
2023-05-13 17:59:23,418 - TPU available: False, using: 0 TPU cores
2023-05-13 17:59:23,419 - IPU available: False, using: 0 IPUs
2023-05-13 17:59:23,419 - HPU available: False, using: 0 HPUs


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


2023-05-13 17:59:24,686 - LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Adjusting learning rate of group 0 to 1.0000e-04.
2023-05-13 17:59:24,704 - 
  | Name                  | Type                  | Params
----------------------------------------------------------------
0 | metrics               | ModuleDict            | 0     
1 | model                 | Model                 | 14.7 M
2 | loss_dice             | DiceLoss              | 0     
3 | loss_tversky          | TverskyLoss           | 0     
4 | loss_focal            | FocalLoss             | 0     
5 | loss_bce              | SoftBCEWithLogitsLoss | 0     
6 | loss_monai_focal_dice | DiceFocalLoss         | 0     
----------------------------------------------------------------
14.7 M    Trainable params
0         Non-trainable params
14.7 M    Total params
29.364    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
