In [1]:
#base reference https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/discussion/327166

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
!pip -q install segmentation-models-pytorch
!pip -q install albumentations --upgrade
!pip -q install opencv-python --upgrade

[K     |████████████████████████████████| 88 kB 3.3 MB/s 
[K     |████████████████████████████████| 58 kB 5.1 MB/s 
[K     |████████████████████████████████| 376 kB 25.1 MB/s 
[?25h  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 113 kB 4.3 MB/s 
[K     |████████████████████████████████| 48.3 MB 1.3 MB/s 
[K     |████████████████████████████████| 60.9 MB 119 kB/s 
[?25h

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
DATASET =     {#'mask':'uwmgi-mask-dataset.zip',
               'masks':'masks.zip',
               'train': 'uw-madison-gi-tract-image-segmentation.zip'}
METADATA=     {#'train_csv':'train.csv',
               #'train_processed':'train_processed.csv',
               #'sample_sub':'sample_submission.csv',
               'config':'config.yaml'}

INPUT_DIR ='/content/drive/MyDrive/kaggle/gi_t_is/input/'
OUTPUT_DIR = '/content/drive/MyDrive/kaggle/gi_t_is/output/'

WORK_DIR='/content/'
EXPERIMENT='lum_unetb3_320_384'
EXPERIMENT_DIR = OUTPUT_DIR + EXPERIMENT+'/'

DATASET_DIR = INPUT_DIR
METADATA_DIR = INPUT_DIR

IMPORT_DIR = '/content/drive/MyDrive/kaggle/gi_t_is/nbs/py/'
LUMINIDE_PY=IMPORT_DIR+'luminide_code/'

In [6]:
def copy_dataset(ds_dict, ds_dir, work_dir):
  for record in ds_dict:
    print('copy', ds_dir+ds_dict[record], ' to', work_dir)
    !cp {ds_dir+ds_dict[record]} {work_dir}
    #print('mkdir',work_dir+record)
    #!mkdir {work_dir+record}
    print ('unzip -q ',work_dir+ds_dict[record],' -d ',work_dir)
    !unzip -q  {work_dir+ds_dict[record]} -d {work_dir}
    print ('rm ',work_dir+ds_dict[record])
    !rm {work_dir+ds_dict[record]}
def copy_metadata(md_dict,md_dir,work_dir):
  for record in md_dict:
    print('copy ', md_dir+md_dict[record],' to ',work_dir)
    !cp {md_dir+md_dict[record]} {work_dir}

copy_dataset(DATASET,DATASET_DIR, WORK_DIR)
copy_metadata(METADATA,METADATA_DIR,WORK_DIR)
!ls /content/

copy /content/drive/MyDrive/kaggle/gi_t_is/input/masks.zip  to /content/
unzip -q  /content/masks.zip  -d  /content/
rm  /content/masks.zip
copy /content/drive/MyDrive/kaggle/gi_t_is/input/uw-madison-gi-tract-image-segmentation.zip  to /content/
unzip -q  /content/uw-madison-gi-tract-image-segmentation.zip  -d  /content/
rm  /content/uw-madison-gi-tract-image-segmentation.zip
copy  /content/drive/MyDrive/kaggle/gi_t_is/input/config.yaml  to  /content/
config.yaml  drive  masks  sample_data	sample_submission.csv  train  train.csv


In [7]:
!mkdir {EXPERIMENT_DIR}

mkdir: cannot create directory ‘/content/drive/MyDrive/kaggle/gi_t_is/output/lum_unetb3_320_384/’: File exists


In [8]:
!cp {INPUT_DIR+'config.yaml'} {EXPERIMENT_DIR}

In [9]:
import sys
sys.path.append(LUMINIDE_PY)

In [10]:
import os
import random
from tqdm.notebook import tqdm
import multiprocessing as mp
from datetime import datetime
import numpy as np
import pandas as pd
import segmentation_models_pytorch as smp

import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter
import torch
from torch import nn
import torch.utils.data as data
from torch import autocast
from torch.cuda.amp import GradScaler

from augment import make_train_augmenter
from dataset import VisionDataset
from models import ModelWrapper
from config import Config
import util

In [11]:
device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device_type)

In [12]:
torch.cuda.get_device_name(0)

'Tesla P100-PCIE-16GB'

In [13]:
class Trainer:
    def __init__(
            self, conf, input_dir, device, num_workers,
            checkpoint, print_interval=100, subset=100):
        self.conf = conf
        self.input_dir = input_dir
        self.device = device
        self.max_patience = 10
        self.print_interval = print_interval
        self.use_amp = torch.cuda.is_available()
        if self.use_amp:
            self.scaler = GradScaler()

        self.create_dataloaders(num_workers, subset)

        self.model = ModelWrapper(conf, self.num_classes)
        self.model = self.model.to(device)
        self.optimizer = self.create_optimizer(conf, self.model)
        assert  self.optimizer is not None, f'Unknown optimizer {conf.optim}'
        if checkpoint:
            self.model.load_state_dict(checkpoint['model'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
        self.scheduler = torch.optim.lr_scheduler.ExponentialLR(
            self.optimizer, gamma=conf.gamma)
        self.loss_funcs = [
            smp.losses.SoftBCEWithLogitsLoss(),
            smp.losses.TverskyLoss(mode='multilabel', log_loss=False),
        ]
        self.history = None

    def create_dataloaders(self, num_workers, subset):
        conf = self.conf
        meta_file = os.path.join(self.input_dir, 'train.csv')
        assert os.path.exists(meta_file), f'{meta_file} not found on Compute Server'
        meta_df = pd.read_csv(meta_file, dtype=str)
        class_names = util.get_class_names(meta_df)
        self.num_classes = len(class_names)

        df = util.process_files(self.input_dir, 'train', meta_df, class_names)
        # shuffle
        df = df.sample(frac=1, random_state=0).reset_index(drop=True)
        train_aug = make_train_augmenter(conf)
        test_aug = util.make_test_augmenter(conf)

        # split into train and validation sets
        split = df.shape[0]*90//100
        train_df = df.iloc[:split].reset_index(drop=True)
        val_df = df.iloc[split:].reset_index(drop=True)
        train_dataset = VisionDataset(
            train_df, conf, self.input_dir, 'train',
            class_names, train_aug, subset=subset)
        val_dataset = VisionDataset(
            val_df, conf, self.input_dir, 'train',
            class_names, test_aug, subset=subset)
        drop_last = (len(train_dataset) % conf.batch_size) == 1
        self.train_loader = data.DataLoader(
            train_dataset, batch_size=conf.batch_size, shuffle=True,
            num_workers=num_workers, pin_memory=False,
            worker_init_fn=worker_init_fn, drop_last=drop_last)
        self.val_loader = data.DataLoader(
            val_dataset, batch_size=conf.batch_size, shuffle=False,
            num_workers=num_workers, pin_memory=False)

    def create_optimizer(self, conf, model):
        if conf.optim == 'sgd':
            return torch.optim.SGD(
                model.parameters(), lr=conf.lr, momentum=0.9,
                weight_decay=conf.weight_decay)
        if conf.optim == 'adam':
            return torch.optim.AdamW(
                model.parameters(), lr=conf.lr,
                weight_decay=conf.weight_decay)
        return None

    def fit(self, epochs):
        best_loss = None
        patience = self.max_patience
        self.sample_count = 0
        self.history = util.LossHistory(save_dir=EXPERIMENT_DIR)

        print(f'Running on {device}')
        print(f'{len(self.train_loader.dataset)} examples in training set')
        print(f'{len(self.val_loader.dataset)} examples in validation set')
        trial = os.environ.get('TRIAL')
        suffix = f"-trial{trial}" if trial is not None else ""
        log_dir = EXPERIMENT_DIR+f"runs/{datetime.now().strftime('%b%d_%H-%M-%S')}{suffix}"
        writer = SummaryWriter(log_dir=log_dir)

        print('Training in progress...')
        for epoch in range(epochs):
            # train for one epoch
            print(f'Epoch {epoch}:')
            train_loss = self.train_epoch(epoch)
            val_loss, val_score = self.validate()
            writer.add_scalar('Training loss', train_loss, epoch)
            writer.add_scalar('Validation loss', val_loss, epoch)
            writer.add_scalar('Validation F1 score', val_score, epoch)
            writer.add_scalar('LR',self.optimizer.param_groups[0]["lr"],epoch)
            writer.flush()
            self.scheduler.step()
            print(f'training loss {train_loss:.5f}')
            print(f'Validation F1 score {val_score:.4f} loss {val_loss:.4f}\n')
            self.history.add_epoch_val_loss(epoch, self.sample_count, val_loss)
            if best_loss is None or val_loss < best_loss:
                best_loss = val_loss
                state = {
                    'epoch': epoch, 'model': self.model.state_dict(),
                    'optimizer' : self.optimizer.state_dict(),
                    'conf': self.conf.as_dict()
                }
                torch.save(state, EXPERIMENT_DIR+f"best_valid-l{val_loss:0.4f}_s{val_score:0.4f}.pth")
                patience = self.max_patience
            else:
                patience -= 1
                if patience == 0:
                    print(
                        f'Validation loss did not improve for '
                        f'{self.max_patience} epochs')
                    break

            self.history.save()
        writer.close()

    def criterion(self, outputs, labels):
        result = 0
        for func in self.loss_funcs:
            result += func(outputs, labels)
        return result

    def train_epoch(self, epoch):
        model = self.model
        optimizer = self.optimizer

        val_iter = iter(self.val_loader)
        dataloader_len = len(self.train_loader)
        val_interval = dataloader_len//len(self.val_loader)
        assert val_interval > 0
        train_loss_list = []
        model.train()
        pbar = tqdm(enumerate(self.train_loader), total=dataloader_len, desc='Train ')
        for step, (images, labels) in pbar:
            #print(f'train_epoch: {images.shape}')
            if (step + 1) % val_interval == 0:
                model.eval()
                # collect validation history for tuning
                try:
                    with torch.no_grad():
                        val_images, val_labels = next(val_iter)
                        val_images = val_images.to(device)
                        val_labels = val_labels.to(device)
                        with autocast(device_type, enabled=self.use_amp):
                            val_outputs = model(val_images)
                        val_loss = self.criterion(val_outputs, val_labels)
                        self.history.add_val_loss(epoch, self.sample_count, val_loss.item())
                except StopIteration:
                    pass
                # switch back to training mode
                model.train()

            images = images.to(device)
            labels = labels.to(device)
            # compute output
            # use AMP
            with autocast(device_type, enabled=self.use_amp):
                outputs = model(images)
                loss = self.criterion(outputs, labels)

            train_loss_list.append(loss.item())
            self.sample_count += images.shape[0]
            self.history.add_train_loss(epoch, self.sample_count, loss.item())
            if (step + 1) % self.print_interval == 0:
                print(f'Batch {step + 1}: training loss {loss.item():.5f} lr: {optimizer.param_groups[0]["lr"]:.8f}')
            # compute gradient and do SGD step
            if self.use_amp:
                self.scaler.scale(loss).backward()
                self.scaler.step(optimizer)
                self.scaler.update()
            else:
                loss.backward()
                optimizer.step()
            optimizer.zero_grad()

        mean_train_loss = np.array(train_loss_list).mean()
        return mean_train_loss

    def validate(self):
        sigmoid = nn.Sigmoid()
        losses = []
        scores = []
        self.model.eval()
        with torch.no_grad():
            pbar = tqdm(self.val_loader, total=len(self.val_loader), desc='Valid ')
            for images, labels in pbar:
                images = images.to(device)
                labels = labels.to(device)
                with autocast(device_type, enabled=self.use_amp):
                    outputs = self.model(images)
                preds = sigmoid(outputs).round().to(torch.float32)
                scores.append(util.dice_coeff(labels, preds).item())
                losses.append(self.criterion(outputs, labels).item())
        return np.mean(losses), np.mean(scores)

In [14]:
def worker_init_fn(worker_id):
    random.seed(random.randint(0, 2**32) + worker_id)
    np.random.seed(random.randint(0, 2**32) + worker_id)

In [15]:
class CFG:
  subset =  100
  seed = 66
  input = WORK_DIR
  #resume = None
  resume = EXPERIMENT_DIR+'best_valid-l0.1062_s0.8990.pth'
  epochs = 20
  print_interval = 100
  num_workers = mp.cpu_count()

In [16]:
def main():
    #args = parser.parse_args()
    if CFG.subset != 100:
        print(f'\nWARNING: {CFG.subset}% of the data will be used for training\n')
    if CFG.seed is not None:
        random.seed(CFG.seed)
        torch.manual_seed(CFG.seed)
        cudnn.deterministic = True
    input_dir = CFG.input
    model_file = CFG.resume
    if model_file:
        print(f'Loading model from {model_file}')
        checkpoint = torch.load(model_file)
        conf = Config(checkpoint['conf'])
    else:
        checkpoint = None
        conf = Config()

    print(conf)
    trainer = Trainer(
        conf, input_dir, device, CFG.num_workers,
        checkpoint, CFG.print_interval, CFG.subset)
    trainer.fit(CFG.epochs)

In [17]:
if __name__ == '__main__':
    main()
    print('Done')

Loading model from /content/drive/MyDrive/kaggle/gi_t_is/output/lum_unetb3_320_384/best_valid-l0.1062_s0.8990.pth
{arch: Unet, aug_prob: 0.4, backbone: efficientnet-b3, batch_size: 32, crop_size: 1,
  gamma: 0.96, h_image_size: 384, lr: 0.001, max_cutout: 0, optim: adam, pretrained: true,
  strong_aug: true, w_image_size: 320, weight_decay: 0.01}



Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b3-5fb5a3c3.pth


  0%|          | 0.00/47.1M [00:00<?, ?B/s]

Running on cuda
34646 examples in training set
3850 examples in validation set
Training in progress...
Epoch 0:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08818 lr: 0.00007334
Batch 200: training loss 0.09262 lr: 0.00007334
Batch 300: training loss 0.16754 lr: 0.00007334
Batch 400: training loss 0.07760 lr: 0.00007334
Batch 500: training loss 0.10281 lr: 0.00007334
Batch 600: training loss 0.11217 lr: 0.00007334
Batch 700: training loss 0.09128 lr: 0.00007334
Batch 800: training loss 0.08576 lr: 0.00007334
Batch 900: training loss 0.08939 lr: 0.00007334
Batch 1000: training loss 0.11575 lr: 0.00007334


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09760
Validation F1 score 0.8989 loss 0.1064

Epoch 1:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.09359 lr: 0.00007041
Batch 200: training loss 0.10404 lr: 0.00007041
Batch 300: training loss 0.07978 lr: 0.00007041
Batch 400: training loss 0.09055 lr: 0.00007041
Batch 500: training loss 0.08803 lr: 0.00007041
Batch 600: training loss 0.08145 lr: 0.00007041
Batch 700: training loss 0.09417 lr: 0.00007041
Batch 800: training loss 0.08753 lr: 0.00007041
Batch 900: training loss 0.08073 lr: 0.00007041
Batch 1000: training loss 0.09048 lr: 0.00007041


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09745
Validation F1 score 0.8970 loss 0.1082

Epoch 2:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.09634 lr: 0.00006759
Batch 200: training loss 0.13474 lr: 0.00006759
Batch 300: training loss 0.11371 lr: 0.00006759
Batch 400: training loss 0.09295 lr: 0.00006759
Batch 500: training loss 0.07402 lr: 0.00006759
Batch 600: training loss 0.08481 lr: 0.00006759
Batch 700: training loss 0.08295 lr: 0.00006759
Batch 800: training loss 0.09679 lr: 0.00006759
Batch 900: training loss 0.09187 lr: 0.00006759
Batch 1000: training loss 0.07955 lr: 0.00006759


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09652
Validation F1 score 0.8985 loss 0.1066

Epoch 3:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.09797 lr: 0.00006489
Batch 200: training loss 0.07730 lr: 0.00006489
Batch 300: training loss 0.10338 lr: 0.00006489
Batch 400: training loss 0.14920 lr: 0.00006489
Batch 500: training loss 0.08278 lr: 0.00006489
Batch 600: training loss 0.08560 lr: 0.00006489
Batch 700: training loss 0.09028 lr: 0.00006489
Batch 800: training loss 0.10138 lr: 0.00006489
Batch 900: training loss 0.10750 lr: 0.00006489
Batch 1000: training loss 0.09015 lr: 0.00006489


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09655
Validation F1 score 0.8981 loss 0.1073

Epoch 4:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.10514 lr: 0.00006229
Batch 200: training loss 0.08305 lr: 0.00006229
Batch 300: training loss 0.09638 lr: 0.00006229
Batch 400: training loss 0.09000 lr: 0.00006229
Batch 500: training loss 0.08490 lr: 0.00006229
Batch 600: training loss 0.08421 lr: 0.00006229
Batch 700: training loss 0.10366 lr: 0.00006229
Batch 800: training loss 0.09511 lr: 0.00006229
Batch 900: training loss 0.11398 lr: 0.00006229
Batch 1000: training loss 0.10502 lr: 0.00006229


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09703
Validation F1 score 0.8964 loss 0.1083

Epoch 5:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08907 lr: 0.00005980
Batch 200: training loss 0.09877 lr: 0.00005980
Batch 300: training loss 0.12854 lr: 0.00005980
Batch 400: training loss 0.08975 lr: 0.00005980
Batch 500: training loss 0.13977 lr: 0.00005980
Batch 600: training loss 0.11557 lr: 0.00005980
Batch 700: training loss 0.09646 lr: 0.00005980
Batch 800: training loss 0.06669 lr: 0.00005980
Batch 900: training loss 0.08220 lr: 0.00005980
Batch 1000: training loss 0.07473 lr: 0.00005980


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09644
Validation F1 score 0.8969 loss 0.1080

Epoch 6:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.12947 lr: 0.00005741
Batch 200: training loss 0.08398 lr: 0.00005741
Batch 300: training loss 0.09283 lr: 0.00005741
Batch 400: training loss 0.08668 lr: 0.00005741
Batch 500: training loss 0.09240 lr: 0.00005741
Batch 600: training loss 0.12835 lr: 0.00005741
Batch 700: training loss 0.09582 lr: 0.00005741
Batch 800: training loss 0.07820 lr: 0.00005741
Batch 900: training loss 0.07560 lr: 0.00005741
Batch 1000: training loss 0.10471 lr: 0.00005741


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09647
Validation F1 score 0.8983 loss 0.1068

Epoch 7:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08633 lr: 0.00005511
Batch 200: training loss 0.10032 lr: 0.00005511
Batch 300: training loss 0.08966 lr: 0.00005511
Batch 400: training loss 0.09408 lr: 0.00005511
Batch 500: training loss 0.08056 lr: 0.00005511
Batch 600: training loss 0.08411 lr: 0.00005511
Batch 700: training loss 0.08870 lr: 0.00005511
Batch 800: training loss 0.08813 lr: 0.00005511
Batch 900: training loss 0.07446 lr: 0.00005511
Batch 1000: training loss 0.07607 lr: 0.00005511


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09502
Validation F1 score 0.8990 loss 0.1063

Epoch 8:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.09456 lr: 0.00005291
Batch 200: training loss 0.09807 lr: 0.00005291
Batch 300: training loss 0.11858 lr: 0.00005291
Batch 400: training loss 0.08495 lr: 0.00005291
Batch 500: training loss 0.09397 lr: 0.00005291
Batch 600: training loss 0.09328 lr: 0.00005291
Batch 700: training loss 0.08081 lr: 0.00005291
Batch 800: training loss 0.09048 lr: 0.00005291
Batch 900: training loss 0.08718 lr: 0.00005291
Batch 1000: training loss 0.09669 lr: 0.00005291


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09491
Validation F1 score 0.8998 loss 0.1055

Epoch 9:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.09632 lr: 0.00005079
Batch 200: training loss 0.10734 lr: 0.00005079
Batch 300: training loss 0.11338 lr: 0.00005079
Batch 400: training loss 0.08870 lr: 0.00005079
Batch 500: training loss 0.08352 lr: 0.00005079
Batch 600: training loss 0.10351 lr: 0.00005079
Batch 700: training loss 0.13273 lr: 0.00005079
Batch 800: training loss 0.09379 lr: 0.00005079
Batch 900: training loss 0.10847 lr: 0.00005079
Batch 1000: training loss 0.09036 lr: 0.00005079


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09501
Validation F1 score 0.9006 loss 0.1048

Epoch 10:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08282 lr: 0.00004876
Batch 200: training loss 0.12821 lr: 0.00004876
Batch 300: training loss 0.12964 lr: 0.00004876
Batch 400: training loss 0.11570 lr: 0.00004876
Batch 500: training loss 0.09127 lr: 0.00004876
Batch 600: training loss 0.08938 lr: 0.00004876
Batch 700: training loss 0.10726 lr: 0.00004876
Batch 800: training loss 0.07924 lr: 0.00004876
Batch 900: training loss 0.11851 lr: 0.00004876
Batch 1000: training loss 0.09337 lr: 0.00004876


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09446
Validation F1 score 0.8998 loss 0.1056

Epoch 11:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.14159 lr: 0.00004681
Batch 200: training loss 0.08613 lr: 0.00004681
Batch 300: training loss 0.14834 lr: 0.00004681
Batch 400: training loss 0.09608 lr: 0.00004681
Batch 500: training loss 0.09119 lr: 0.00004681
Batch 600: training loss 0.09070 lr: 0.00004681
Batch 700: training loss 0.08478 lr: 0.00004681
Batch 800: training loss 0.09355 lr: 0.00004681
Batch 900: training loss 0.07850 lr: 0.00004681
Batch 1000: training loss 0.14971 lr: 0.00004681


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09561
Validation F1 score 0.8994 loss 0.1059

Epoch 12:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.16046 lr: 0.00004494
Batch 200: training loss 0.08704 lr: 0.00004494
Batch 300: training loss 0.08700 lr: 0.00004494
Batch 400: training loss 0.08178 lr: 0.00004494
Batch 500: training loss 0.09455 lr: 0.00004494
Batch 600: training loss 0.10232 lr: 0.00004494
Batch 700: training loss 0.08934 lr: 0.00004494
Batch 800: training loss 0.09496 lr: 0.00004494
Batch 900: training loss 0.12719 lr: 0.00004494
Batch 1000: training loss 0.08082 lr: 0.00004494


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09520
Validation F1 score 0.9010 loss 0.1043

Epoch 13:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.09456 lr: 0.00004314
Batch 200: training loss 0.11054 lr: 0.00004314
Batch 300: training loss 0.10268 lr: 0.00004314
Batch 400: training loss 0.09169 lr: 0.00004314
Batch 500: training loss 0.07560 lr: 0.00004314
Batch 600: training loss 0.08687 lr: 0.00004314
Batch 700: training loss 0.08193 lr: 0.00004314
Batch 800: training loss 0.09200 lr: 0.00004314
Batch 900: training loss 0.10780 lr: 0.00004314
Batch 1000: training loss 0.08982 lr: 0.00004314


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09459
Validation F1 score 0.9004 loss 0.1048

Epoch 14:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08896 lr: 0.00004141
Batch 200: training loss 0.08021 lr: 0.00004141
Batch 300: training loss 0.09511 lr: 0.00004141
Batch 400: training loss 0.13203 lr: 0.00004141
Batch 500: training loss 0.07421 lr: 0.00004141
Batch 600: training loss 0.10703 lr: 0.00004141
Batch 700: training loss 0.12217 lr: 0.00004141
Batch 800: training loss 0.09524 lr: 0.00004141
Batch 900: training loss 0.09043 lr: 0.00004141
Batch 1000: training loss 0.15489 lr: 0.00004141


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09463
Validation F1 score 0.8998 loss 0.1049

Epoch 15:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08836 lr: 0.00003976
Batch 200: training loss 0.08527 lr: 0.00003976
Batch 300: training loss 0.10250 lr: 0.00003976
Batch 400: training loss 0.09116 lr: 0.00003976
Batch 500: training loss 0.08475 lr: 0.00003976
Batch 600: training loss 0.08591 lr: 0.00003976
Batch 700: training loss 0.11278 lr: 0.00003976
Batch 800: training loss 0.09115 lr: 0.00003976
Batch 900: training loss 0.08674 lr: 0.00003976
Batch 1000: training loss 0.09640 lr: 0.00003976


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09485
Validation F1 score 0.8992 loss 0.1058

Epoch 16:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.07945 lr: 0.00003817
Batch 200: training loss 0.10675 lr: 0.00003817
Batch 300: training loss 0.09161 lr: 0.00003817
Batch 400: training loss 0.09134 lr: 0.00003817
Batch 500: training loss 0.10078 lr: 0.00003817
Batch 600: training loss 0.09497 lr: 0.00003817
Batch 700: training loss 0.11374 lr: 0.00003817
Batch 800: training loss 0.14527 lr: 0.00003817
Batch 900: training loss 0.10041 lr: 0.00003817
Batch 1000: training loss 0.10537 lr: 0.00003817


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09524
Validation F1 score 0.8995 loss 0.1052

Epoch 17:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08308 lr: 0.00003664
Batch 200: training loss 0.07704 lr: 0.00003664
Batch 300: training loss 0.11854 lr: 0.00003664
Batch 400: training loss 0.08520 lr: 0.00003664
Batch 500: training loss 0.09439 lr: 0.00003664
Batch 600: training loss 0.08188 lr: 0.00003664
Batch 700: training loss 0.09537 lr: 0.00003664
Batch 800: training loss 0.07912 lr: 0.00003664
Batch 900: training loss 0.09166 lr: 0.00003664
Batch 1000: training loss 0.13268 lr: 0.00003664


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09676
Validation F1 score 0.9003 loss 0.1047

Epoch 18:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08036 lr: 0.00003518
Batch 200: training loss 0.10747 lr: 0.00003518
Batch 300: training loss 0.09383 lr: 0.00003518
Batch 400: training loss 0.14970 lr: 0.00003518
Batch 500: training loss 0.11419 lr: 0.00003518
Batch 600: training loss 0.08734 lr: 0.00003518
Batch 700: training loss 0.07838 lr: 0.00003518
Batch 800: training loss 0.07852 lr: 0.00003518
Batch 900: training loss 0.09021 lr: 0.00003518
Batch 1000: training loss 0.10192 lr: 0.00003518


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09655
Validation F1 score 0.9018 loss 0.1034

Epoch 19:


Train :   0%|          | 0/1083 [00:00<?, ?it/s]

Batch 100: training loss 0.08561 lr: 0.00003377
Batch 200: training loss 0.09482 lr: 0.00003377
Batch 300: training loss 0.07851 lr: 0.00003377
Batch 400: training loss 0.13993 lr: 0.00003377
Batch 500: training loss 0.06639 lr: 0.00003377
Batch 600: training loss 0.08645 lr: 0.00003377
Batch 700: training loss 0.07935 lr: 0.00003377
Batch 800: training loss 0.08848 lr: 0.00003377
Batch 900: training loss 0.08226 lr: 0.00003377
Batch 1000: training loss 0.09812 lr: 0.00003377


Valid :   0%|          | 0/121 [00:00<?, ?it/s]

training loss 0.09691
Validation F1 score 0.9010 loss 0.1040

Done


In [18]:
!ls

config.yaml  masks	  sample_submission.csv  train.csv
drive	     sample_data  train			 train_processed.csv
