# Imports

In [61]:
import wandb
import pandas as pd
import numpy as np
import plotly.express as px
import torch
import wandb
from matplotlib import pyplot as plt

In [None]:
# Kaggle cell
#
# !pip install wandb timm numpy --upgrade
# !rm -r kaggle_happywhale_2022
# !git clone https://github.com/btseytlin/kaggle_happywhale_2022.git
# import sys
# sys.path.append('kaggle_happywhale_2022')
#
# import wandb
#
# try:
#     from kaggle_secrets import UserSecretsClient
#     user_secrets = UserSecretsClient()
#     api_key = user_secrets.get_secret("WANDB")
#     wandb.login(key=api_key)
#     anonymous = None
# except:
#     anonymous = "must"
#     wandb.login(anonymous=anonymous)
#     print('wand secret missing')

In [62]:
from happywhale import (ImageDataMoodule,
                        ImageBackbone,
                        Classifier,
                        seed_torch,
                        load_train_test_dfs,
                        get_cv_splits)

In [63]:
%matplotlib inline

In [64]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [65]:
# seed_torch(42)

# SEEDING DONE


# Config

In [66]:
EXP_NAME = 'baseline_classifier'
tags = ['dataset_base_128', 'backbone_effnet_b0']
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'


TRAIN_IMG_DIR = '../input/jpeg-happywhale-256x256/train_images-256-256/train_images-256-256'
TEST_IMG_DIR = '../input/jpeg-happywhale-256x256/test_images-256-256/test_images-256-256'
TRAIN_CSV_PATH = '../input/happy-whale-and-dolphin/train.csv'
TEST_CSV_PATH = '../input/happy-whale-and-dolphin/sample_submission.csv'


BACKBONE = 'efficientnetv2_s'
LR = 1e-3
EMBEDDING_DIM = 1000

N_EPOCHS = 20
BATCH_SIZE = 64
NUM_WORKERS = 2


TRAINER_KWARGS = dict(
    max_epochs=N_EPOCHS,
    devices="auto",
    accelerator="auto",
    gradient_clip_val=1,
    accumulate_grad_batches=2,
    # stochastic_weight_avg=True,
    # fast_dev_run=True,
)

if DEVICE != 'cpu':
    TRAINER_KWARGS.update(
        dict(
            # amp_backend='apex',
            # amp_level='O2',
            precision=16,
        )
    )

# CV: split and prepare datamodules

In [67]:
train_df, test_df = load_train_test_dfs(
    train_csv_path=TRAIN_CSV_PATH,
    test_csv_path=TEST_CSV_PATH,
    train_images_path=TRAIN_IMG_DIR,
    test_images_path=TEST_IMG_DIR,
)
print(train_df.shape, test_df.shape)

(51033, 5) (27956, 4)


In [68]:
cv_splits = get_cv_splits(train_df)



In [69]:
cv_splits[0].train.shape, cv_splits[0].val.shape, cv_splits[0].test.shape,

((36743,), (4083,), (10207,))

In [70]:
split_datamodules = []
for split in cv_splits:
    split_train_df = train_df.iloc[split.train]
    split_val_df = train_df.iloc[split.val]
    split_test_df = train_df.iloc[split.test]
    datamodule = ImageDataMoodule(
        train_df=split_train_df,
        val_df=split_val_df,
        test_df=split_test_df,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
    )
    datamodule.setup()
    split_datamodules.append(datamodule)

  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(
  rank_zero_deprecation(


# Train model

In [72]:
models = []
for i, datamodule in enumerate(split_datamodules):
    backbone = ImageBackbone(model_name=BACKBONE)
    model = Classifier(
        backbone=backbone,
        lr=LR,
        num_labels=len(datamodule.label_encoder.classes_),
        num_training_steps=len(datamodule.train)//datamodule.batch_size * N_EPOCHS,
        trainer_kwargs=TRAINER_KWARGS,
        embedding_dim=EMBEDDING_DIM,
        exp_name=EXP_NAME,
    )
    run = wandb.init(
        project='kaggle_happywhale',
        name=EXP_NAME + f'_fold_{i}',
        tags=tags,
    )
    model.fit(datamodule)
    model.test(datamodule)
    run.finish()

No pretrained weights exist for this model. Using random initialization.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Running in fast_dev_run mode: will run a full train, val, test and prediction loop using 1 batch(es).
  rank_zero_deprecation(

  | Name       | Type             | Params
------------------------------------------------
0 | backbone   | ImageBackbone    | 21.5 M
1 | classifier | Sequential       | 16.6 M
2 | loss       | CrossEntropyLoss | 0     
------------------------------------------------
38.1 M    Trainable params
0         Non-trainable params
38.1 M    Total params
152.248   Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[Errno 21] Is a directory: '/Users/btseytlin/Documents/kaggle_happywhale_2022/notebooks'


  rank_zero_deprecation(
