In [None]:
!nvidia-smi

Sun Feb 27 14:29:04 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-40GB      Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    49W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install wandb
!pip install -q --upgrade timm
!pip install -q --upgrade pytorch-lightning
!pip install --upgrade albumentations
!pip install --upgrade opencv-python
!pip install python-box

# BASE_PATH = '/content/drive/MyDrive/ml competitions/zindi/sign language/'
# !cp -r "{BASE_PATH}" ./data/
# !unzip -q "data/Images.zip" -d ./data

In [None]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mdevnikhilmishra[0m (use `wandb login --relogin` to force relogin)


True

In [None]:
import numpy as np
import pandas as pd
import random
import os
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import log_loss
from timm import create_model
import albumentations as A
from albumentations.pytorch import ToTensorV2
from box import Box
import pytorch_lightning as pl
import timm
from PIL import Image
from scipy.special import softmax
import gc
from pathlib import Path
from albumentations.core.transforms_interface import ImageOnlyTransform
from timm.optim import MADGRAD
from torch.optim import AdamW

In [None]:
[c for c in timm.list_models() if 'eff' in c and 'ns' in c and 'l2' in c]

['tf_efficientnet_l2_ns', 'tf_efficientnet_l2_ns_475']

In [None]:
class Config:
    
    N_SPLITS = 30
    LAST_FOLD = 6
    RANDOM_STATE = 125
    N_EPOCHS = 5

    ID_COL = 'img_IDS'
    LABEL_COL = 'Label'
    
    BATCH_SIZE = 4
    DATA_DIR = './data/'
    NAME = 'conv_v7'

    IMAGE_SIZE = 600
    OUTPUT_PATH = './outputs'

    DEBUG=False
    DEBUG_FRAC=0.15

    N_TTA_AUGS = 9

In [None]:
train = pd.read_csv(os.path.join(Config.DATA_DIR, 'Train.csv'))
test = pd.read_csv(os.path.join(Config.DATA_DIR, 'Test.csv'))
ss = pd.read_csv(os.path.join(Config.DATA_DIR, 'SampleSubmission.csv'))

In [None]:
config = {'seed': 2021,
          'root': os.path.join(Config.DATA_DIR, 'Images'), 
          'n_splits': Config.N_SPLITS,
          'epoch': Config.N_EPOCHS,
          'trainer': {
              'gpus': [0],
              'accumulate_grad_batches': 4,
              'progress_bar_refresh_rate': 1,
              'fast_dev_run': False,
              'num_sanity_val_steps': 0,
              'resume_from_checkpoint': None,
          },
          'train_loader':{
              'batch_size': Config.BATCH_SIZE,
              'shuffle': True,
              'num_workers': 4,
              'pin_memory': True,
              'drop_last': True,
          },
          'val_loader': {
              'batch_size': 64,
              'shuffle': False,
              'num_workers': 4,
              'pin_memory': True,
              'drop_last': False
         },
          'model':{
              'name': 'convnext_xlarge_384_in22ft1k',
              'output_dim': 9
          },
          'optimizer':{
              'name': 'AdamW',
              'params':{
                  'lr': 5e-5
              },
          },
          'scheduler':{
              'name': 'optim.lr_scheduler.CosineAnnealingWarmRestarts',
              'params':{
                  'T_0': 160,
                  'eta_min': 1e-5,
              }
          },
         }

config = Box(config)

In [None]:
IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB

In [None]:
HEIGHT, WIDTH = Config.IMAGE_SIZE, Config.IMAGE_SIZE

In [None]:
class CutoutV3(A.DualTransform):
    def __init__(
        self,
        num_holes=8,
        max_h_size=8,
        max_w_size=8,
        fill_value=0,
        always_apply=False,
        p=0.5,
    ):
        super(CutoutV3, self).__init__(always_apply, p)
        self.num_holes = num_holes
        self.max_h_size = max_h_size
        self.max_w_size = max_w_size
        self.fill_value = fill_value

    def apply(self, image, fill_value=0, holes=(), **params):
        return A.functional.cutout(image, holes, fill_value)

    def get_params_dependent_on_targets(self, params):
        img = params["image"]
        height, width = img.shape[:2]

        holes = []
        for _n in range(self.num_holes):
            y = random.choice([random.randint(0, int(height*0.1)), random.randint(int(height*0.9), height)])
            x = random.choice([random.randint(0, int(width*0.1)), random.randint(int(width*0.9), width)])

            y1 = np.clip(y - self.max_h_size // 2, 0, height)
            y2 = np.clip(y1 + self.max_h_size, 0, height)
            x1 = np.clip(x - self.max_w_size // 2, 0, width)
            x2 = np.clip(x1 + self.max_w_size, 0, width)
            holes.append((x1, y1, x2, y2))

        return {"holes": holes}

    @property
    def targets_as_params(self):
        return ["image"]

    def get_transform_init_args_names(self):
        return ("num_holes", "max_h_size", "max_w_size")

In [None]:
class PadAndResize(ImageOnlyTransform):
    def __init__(self, always_apply: bool = True, p: float = 0.5):
        self.p = p
        self.always_apply = always_apply
        self._additional_targets={}

        # replay mode params
        self.deterministic = False
        self.save_key = "replay"
        self.params= {}
        self.replay_mode = False
        self.applied_in_replay = False

    def apply(self, img, **params):
        img = Image.fromarray(np.uint8(img)).convert('RGB')
        new_im = Image.new("RGB", (1280, 1280))
        w, h = img.size
        box_coords = ((1280-w)//2, (1280-h)//2, (1280+w)//2, (1280+h)//2)
        new_im.paste(img, box_coords)
        new_im = new_im.resize((Config.IMAGE_SIZE, Config.IMAGE_SIZE))
        new_im = np.array(new_im)
        return new_im

In [None]:
def apply(img):
    img = Image.fromarray(np.uint8(img)).convert('RGB')
    new_im = Image.new("RGB", (1280, 1280))
    w, h = img.size
    box_coords = ((1280-w)//2, (1280-h)//2, (1280+w)//2, (1280+h)//2)
    new_im.paste(img, box_coords)
    new_im = new_im.resize((Config.IMAGE_SIZE, Config.IMAGE_SIZE))
    new_im = np.array(new_im)
    return new_im

In [None]:
albu_train_transforms = A.Compose([
    A.Perspective(scale=(0.05, 0.12), p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightness(limit=0.1, p=0.75),
    A.RandomContrast(limit=0.1, p=0.75),
    A.HorizontalFlip(p=0.5),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=40, val_shift_limit=0, p=0.75),
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.3, rotate_limit=35, border_mode=0, p=0.75),
    CutoutV3(max_h_size=int(HEIGHT * 0.03), max_w_size=int(WIDTH * 0.03), num_holes=4, p=0.75),
    # A.Resize(height=HEIGHT, width=WIDTH, always_apply=True),
    PadAndResize(),
    A.Normalize(always_apply=True),
    ToTensorV2(always_apply=True),
])


albu_val_transforms = A.Compose([
    PadAndResize(),
    # A.Resize(height=HEIGHT, width=WIDTH, always_apply=True),
    A.Normalize(always_apply=True),
    ToTensorV2(),
])


albu_test_tta_transforms = A.Compose([
    A.Perspective(scale=(0.05, 0.12), p=0.1),
    A.HorizontalFlip(p=0.1),
    A.RandomBrightness(limit=0.01, p=0.15),
    A.RandomContrast(limit=0.01, p=0.15),
    A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=0, p=0.3),
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=15, border_mode=0, p=0.5),
    PadAndResize(),
    A.Normalize(always_apply=True),
    ToTensorV2(),
])



In [None]:
class SignDataset(torch.utils.data.Dataset):
    def __init__(self, df, augmentations=None):
        self._X = df["img_IDS"].values
        self._y = None
        if augmentations is None:
            print('why')
        if "Label" in df.keys():
            self._y = df["Label"].values
        self.augmentations = augmentations

    def __len__(self):
        return len(self._X)

    def __getitem__(self, idx):
        image_path = self._X[idx]
        image = np.array(Image.open(f'{config.root}/{image_path}.jpg'))

        if self.augmentations is not None:
            augmented_image = self.augmentations(image=image)
            image = augmented_image['image']

        

        if self._y is not None:
            label = self._y[idx]
            return image, label
        return image

class SignDataModule(pl.LightningDataModule):
    def __init__(
        self,
        train_df,
        val_df,
        test_df,
        cfg,
    ):
        super().__init__()
        self._train_df = train_df
        self._val_df = val_df
        self.test_df = test_df
        self._cfg = cfg

    def __create_dataset(self, train=True):
        return (
            SignDataset(self._train_df, albu_train_transforms)
            if train
            else SignDataset(self._val_df, albu_val_transforms)
        )

    def train_dataloader(self):
        dataset = self.__create_dataset(True)
        return torch.utils.data.DataLoader(dataset, **self._cfg.train_loader)

    def val_dataloader(self):
        dataset = self.__create_dataset(False)
        return torch.utils.data.DataLoader(dataset, **self._cfg.val_loader)
    
    def test_dataloader(self):
        dataset = SignDataset(self.test_df, albu_val_transforms)
        return torch.utils.data.DataLoader(dataset, **self._cfg.val_loader)

In [None]:
def tta_test_dataloader(test_df):
    dataset = SignDataset(test_df, albu_test_tta_transforms)
    return torch.utils.data.DataLoader(dataset, **config.val_loader)

In [None]:
mapper = pd.Series(index=train[Config.LABEL_COL].unique(), data=np.arange(train[Config.LABEL_COL].nunique())).to_dict()
inv_mapper = {val: key for key, val in mapper.items()}
train[Config.LABEL_COL] = train[Config.LABEL_COL].map(mapper).astype(np.float64)
Config.N_LABELS = len(mapper)
print(f'Total Labels in train data are: {Config.N_LABELS}')
test[Config.LABEL_COL] = 0

Total Labels in train data are: 9


In [None]:

if Config.DEBUG:
    train = train.sample(frac=Config.DEBUG_FRAC, random_state=1).reset_index(drop=True)

In [None]:
def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
    assert alpha > 0, "alpha should be larger than 0"
    assert x.size(0) > 1, "Mixup cannot be applied to a single instance."

    lam = np.random.beta(alpha, alpha)
    rand_index = torch.randperm(x.size()[0])
    mixed_x = lam * x + (1 - lam) * x[rand_index, :]
    target_a, target_b = y, y[rand_index]
    return mixed_x, target_a, target_b, lam

class Model(pl.LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.__build_model()
        # self.transform = get_default_transforms()
        self.save_hyperparameters(cfg)
        self.criteria = torch.nn.CrossEntropyLoss(reduction='mean')

    def __build_model(self):
        self.backbone = create_model(
            self.cfg.model.name, pretrained=True, num_classes=0, in_chans=3
        )
        num_features = self.backbone.num_features
        self.fc = nn.Sequential(
            nn.Dropout(0.5), nn.Linear(num_features, Config.N_LABELS)
        )

    def forward(self, x):
        f = self.backbone(x)
        out = self.fc(f)
        return out
    
    def training_step(self, batch, batch_idx):
        loss, pred, labels = self.__share_step(batch, 'train')
        self.log(
            name="train_loss",
            value=loss.cpu(),
            prog_bar=True,
            on_step=True,
            # on_epoch=True,
            sync_dist=True,
            rank_zero_only=True,
        )
        
        self.log(name='train_metric',
                 value=loss.cpu(),
                 prog_bar=True,
                 on_step=True,
                 on_epoch=True,
                 rank_zero_only=True,
                 sync_dist=True
                 )
        
        return {'loss': loss.cpu(), 'pred': pred, 'labels': labels}
        
    def validation_step(self, batch, batch_idx):
        loss, pred, labels = self.__share_step(batch, 'val')
        self.log(
            name="val_metric",
            value=loss.cpu(),
            prog_bar=True,
            on_step=False,
            on_epoch=True,
            sync_dist=True,
            rank_zero_only=True,
        )
        
        
        self.log(
            name="val_loss",
            value=loss.cpu(),
            prog_bar=True,
            on_step=False,
            on_epoch=True,
            sync_dist=True,
            rank_zero_only=True,
        )
        return {'loss': loss.cpu(), 'pred': pred, 'labels': labels}
    
    def __share_step(self, batch, mode):
        images, labels = batch
        labels = labels.to(torch.long)


        if torch.rand(1)[0] < 0.3 and mode == 'train':
            mix_images, target_a, target_b, lam = mixup(images, labels, alpha=0.5)
            logits = self.forward(mix_images).squeeze(1)
            loss = self.criteria(logits, target_a) * lam + \
                (1 - lam) * self.criteria(logits, target_b)
        else:
            logits = self.forward(images).squeeze(1)
            loss = self.criteria(logits, labels)
        
        pred = logits.sigmoid().detach().cpu()
        labels = labels.detach().cpu()
        del images, batch
        return loss, pred, labels
        
    def training_epoch_end(self, outputs):
        self.__share_epoch_end(outputs, 'train')

    def validation_epoch_end(self, outputs):
        self.__share_epoch_end(outputs, 'val')    
        
    def __share_epoch_end(self, outputs, mode):
        preds = []
        labels = []
        for out in outputs:
            pred, label = out['pred'], out['labels']
            preds.append(pred)
            labels.append(label)
            del out
            
        preds = torch.cat(preds)
        labels = torch.cat(labels)
        metrics = self.criteria(preds, labels.to(torch.long))
        del preds, labels
        _ = gc.collect()
        self.log(f'{mode}_loss', metrics)
        
    def test_step(self, batch, batch_idx):
        preds = self.model(batch['image'])

        return {'preds': preds}
    
    def predict_step(self, batch, batch_idx):
        preds = self(batch[0].to(torch.float))

        return preds

    def test_epoch_end(self, outputs):
        preds = torch.cat([x['preds'] for x in outputs])
        return preds
    
    def configure_optimizers(self):
        optimizer = eval(self.cfg.optimizer.name)(
            self.parameters(), **self.cfg.optimizer.params
        )
        scheduler = eval(self.cfg.scheduler.name)(
            optimizer,
            **self.cfg.scheduler.params
        )
        return [optimizer], [scheduler]

In [None]:
def get_preds(dl):
    preds = trainer.predict(dataloaders=dl)
    preds = torch.cat(preds).squeeze(-1)
    probs = preds.cpu().detach().numpy()
    del preds
    _ = gc.collect()
    probs = softmax(probs, axis=-1)
    
    return probs

def save_preds(preds, path, ids=None, target=None):
    preds_df = pd.DataFrame(preds)
    preds_df.columns = [inv_mapper[c] for c in preds_df.columns]
    preds_df = preds_df[ss.columns[1:]]

    if ids is not None:
        preds_df.insert(0, Config.ID_COL, ids.values)

    if target is not None:
        preds_df[Config.LABEL_COL] = target.values
        preds_df[Config.LABEL_COL] = preds_df[Config.LABEL_COL].map(inv_mapper)

    preds_df.to_csv(path, index=False)
    

def get_tta_preds(test_df):
    
    test_df_aug = pd.concat([test_df] * Config.N_TTA_AUGS).reset_index(drop=True)
    dl = tta_test_dataloader(test_df_aug)
    preds = get_preds(dl)
    preds_df = pd.DataFrame(preds)
    fts = list(preds_df.columns)
    preds_df = pd.concat([test_df_aug[[Config.ID_COL]], preds_df], axis=1)
    avg_preds = preds_df.groupby(Config.ID_COL, as_index=False)[fts].apply(lambda x: np.exp(np.mean(np.log(x))))
    avg_preds = pd.merge(test_df[[Config.ID_COL]], avg_preds, on=Config.ID_COL, how='left')
    avg_preds = avg_preds[fts].values
    
    del dl, preds_df, preds, test_df_aug
    _ = gc.collect()
    return avg_preds

In [None]:
VERSION_OUTPUT_PATH = Config.OUTPUT_PATH + os.sep + Config.NAME
Path(VERSION_OUTPUT_PATH).mkdir(parents=True, exist_ok=True)

In [None]:
MODEL_SAVE_PATH = 'model_checkpoints' + os.sep + Config.NAME
Path(MODEL_SAVE_PATH).mkdir(parents=True, exist_ok=True)

In [None]:
skf = StratifiedKFold(
    n_splits=Config.N_SPLITS, shuffle=True, random_state=config.seed
)

all_val_preds = np.zeros((train.shape[0], train[Config.LABEL_COL].nunique()))
all_test_preds = np.zeros((test.shape[0], train[Config.LABEL_COL].nunique()))

for fold, (train_idx, val_idx) in enumerate(skf.split(train['img_IDS'], train['Label'])):

    if fold == Config.LAST_FOLD:
        break

    train_df = train.loc[train_idx].reset_index(drop=True)
    val_df = train.loc[val_idx].reset_index(drop=True)
    datamodule = SignDataModule(train_df, val_df, test, config)

    model = Model(config)
    earystopping = pl.callbacks.early_stopping.EarlyStopping(monitor="val_loss")
    lr_monitor = pl.callbacks.LearningRateMonitor()
    
    
    
    loss_checkpoint = pl.callbacks.ModelCheckpoint(
        dirpath=MODEL_SAVE_PATH+os.sep+str(fold),
        monitor="val_metric",
        save_top_k=1,
        mode="min",
        filename='model-{epoch}-{val_loss:.4f}-{val_metric:.4f}'
    )
    logger = pl.loggers.WandbLogger(project='zindi_sign_language', name=Config.NAME)

    
    
    swa_callback = pl.callbacks.StochasticWeightAveraging(swa_epoch_start=2)
    

    trainer = pl.Trainer(
        logger=logger,
        max_epochs=config.epoch,
        callbacks=[lr_monitor, loss_checkpoint, swa_callback],
        **config.trainer,
    )
    trainer.fit(model, datamodule=datamodule)
    gc.collect()
    
    val_preds = get_tta_preds(val_df)
    metric = log_loss(val_df['Label'], val_preds, labels=np.arange(Config.N_LABELS))
    print(f'Fold {fold} logloss: {metric}')
    

    test_preds = get_tta_preds(test)

    _ = datamodule.teardown()
    

    path = VERSION_OUTPUT_PATH + os.sep + f'val_preds_fold_{fold}.csv'
    save_preds(val_preds, path, ids=val_df[Config.ID_COL], target=val_df[Config.LABEL_COL])

    path = VERSION_OUTPUT_PATH + os.sep + f'test_preds_fold_{fold}.csv'
    save_preds(test_preds, path, ids=test[Config.ID_COL])


    del model, trainer, test_preds, val_preds, datamodule, train_df, val_df
    _ = gc.collect()
    _ = torch.cuda.empty_cache()
    _ = gc.collect()

  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name     | Type             | Params
----------------------------------------------
0 | backbone | ConvNeXt         | 348 M 
1 | fc       | Sequential       | 18.4 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
348 M     Trainable params
0         Non-trainable params
348 M     Total params
1,392.666 Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/0/model-epoch=3-val_loss=1.6702-val_metric=0.1889.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/0/model-epoch=3-val_loss=1.6702-val_metric=0.1889.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/0/model-epoch=3-val_loss=1.6702-val_metric=0.1889.ckpt


Fold 0 logloss: 0.1832200572558603


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/0/model-epoch=3-val_loss=1.6702-val_metric=0.1889.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | ConvNeXt         | 348 M 
1 | fc       | Sequential       | 18.4 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
348 M     Trainable params
0         Non-trainable params
348 M     Total params
1,392.666 Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/1/model-epoch=1-val_loss=1.6624-val_metric=0.1647.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/1/model-epoch=1-val_loss=1.6624-val_metric=0.1647.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/1/model-epoch=1-val_loss=1.6624-val_metric=0.1647.ckpt


Fold 1 logloss: 0.16183606365753525


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/1/model-epoch=1-val_loss=1.6624-val_metric=0.1647.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | ConvNeXt         | 348 M 
1 | fc       | Sequential       | 18.4 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
348 M     Trainable params
0         Non-trainable params
348 M     Total params
1,392.666 Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/2/model-epoch=2-val_loss=1.6547-val_metric=0.1367.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/2/model-epoch=2-val_loss=1.6547-val_metric=0.1367.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/2/model-epoch=2-val_loss=1.6547-val_metric=0.1367.ckpt


Fold 2 logloss: 0.14309505279216042


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/2/model-epoch=2-val_loss=1.6547-val_metric=0.1367.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | ConvNeXt         | 348 M 
1 | fc       | Sequential       | 18.4 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
348 M     Trainable params
0         Non-trainable params
348 M     Total params
1,392.666 Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/3/model-epoch=3-val_loss=1.6514-val_metric=0.1212.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/3/model-epoch=3-val_loss=1.6514-val_metric=0.1212.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/3/model-epoch=3-val_loss=1.6514-val_metric=0.1212.ckpt


Fold 3 logloss: 0.11965383194625359


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/3/model-epoch=3-val_loss=1.6514-val_metric=0.1212.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | ConvNeXt         | 348 M 
1 | fc       | Sequential       | 18.4 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
348 M     Trainable params
0         Non-trainable params
348 M     Total params
1,392.666 Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/4/model-epoch=1-val_loss=1.6622-val_metric=0.1389.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/4/model-epoch=1-val_loss=1.6622-val_metric=0.1389.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/4/model-epoch=1-val_loss=1.6622-val_metric=0.1389.ckpt


Fold 4 logloss: 0.1398378653740332


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/4/model-epoch=1-val_loss=1.6622-val_metric=0.1389.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | ConvNeXt         | 348 M 
1 | fc       | Sequential       | 18.4 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
348 M     Trainable params
0         Non-trainable params
348 M     Total params
1,392.666 Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/5/model-epoch=4-val_loss=1.6366-val_metric=0.0830.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/5/model-epoch=4-val_loss=1.6366-val_metric=0.0830.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/conv_v7/5/model-epoch=4-val_loss=1.6366-val_metric=0.0830.ckpt


Fold 5 logloss: 0.08401187924160199


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/conv_v7/5/model-epoch=4-val_loss=1.6366-val_metric=0.0830.ckpt


Predicting: 1510it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
