## Imorts and Config

In [None]:
from pathlib import Path

import albumentations as A
import madgrad
import numpy as np
import pandas as pd
import timm
from albumentations.pytorch import ToTensorV2
from fastai.vision.all import *
from sklearn.model_selection import train_test_split


In [None]:
TRAIN_CSV = Path('train.csv')
R_TRAIN_DATASET = Path('./data/train_256/')
R_TEST_DATASET = Path('./data/test_256/')
O_TEST_DATASET = Path("./data/test/")
BEST_SUBMISSION = Path('./submissoins/subm_best6.csv')
SAVE_MODELS_DIR = Path('./models/')
USE_TEST_IN_TRAIN = True
TTA_PREDICT = False

In [None]:
SEED = 42
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark     = False
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

## Data Preparation

In [None]:
df = pd.read_csv(TRAIN_CSV, delimiter="\t")

df.image_name = df.image_name.apply(lambda x: x.split(".")[0] + ".jpg")
df["path"] = df["image_name"].apply(lambda x: R_TRAIN_DATASET / x)

df = df.sample(frac=1, random_state=SEED).reset_index(drop=True)
X_train, X_val = train_test_split(
    np.arange(len(df)), test_size=0.2, stratify=df["label_id"], random_state=SEED
)
df["isVal"] = False
df.loc[X_val, "isVal"] = True

if USE_TEST_IN_TRAIN:
    df_test = pd.read_csv(BEST_SUBMISSION, delimiter="\t")
    df_test.image_name = df_test.image_name.apply(lambda x: x.split(".")[0] + ".jpg")
    df_test["path"] = df_test["image_name"].apply(lambda x: R_TEST_DATASET / x)
    df_test["isVal"] = False

    df_full = pd.concat([df, df_test])
    df = df_full.sample(frac=1, random_state=SEED).reset_index(drop=True)


In [None]:
class AlbumentationsTransform(DisplayedTransform):
    split_idx,order=0,2
    def __init__(self, train_aug): store_attr()
    
    def encodes(self, img: PILImage):
        aug_img = self.train_aug(image=np.array(img))['image']
        return PILImage.create(aug_img)

def A_augs():
    return A.Compose(
        [
            A.ColorJitter(hue=0.1, p=0.5),
            A.RGBShift(r_shift_limit=20, g_shift_limit=20, b_shift_limit=20, p=0.5),
            A.GaussNoise(var_limit=(50.0, 150.0), p=0.6),
            A.CoarseDropout(p=0.5),
            A.PixelDropout(0.005, p=0.5),
            A.Downscale(scale_min=0.75, scale_max=0.9, interpolation=4, p=0.2),
            A.JpegCompression(quality_lower=45, quality_upper=85, p=0.3),
        ]
    )


In [None]:
memes = DataBlock(blocks=(ImageBlock, CategoryBlock), 
                    get_x=ColReader('path'),
                    get_y=ColReader('label_id'),
                    splitter = ColSplitter('isVal'),
                    item_tfms = AlbumentationsTransform(A_augs()),
                    batch_tfms=[*aug_transforms(mult=0.3, min_scale=1, size=224)]
                    )
dls = memes.dataloaders(df, bs=32)

In [None]:
dls.train.show_batch(max_n=18, nrows=2)

In [None]:
dls.valid.show_batch(max_n=18, nrows=2)

## Train Model

In [None]:
model_save_name = 'hardaugs_v2_swin_large_patch4_window7_224_in22k_mixedtrain_qhadam.pkl'
learn = vision_learner(
    dls,
    "swin_large_patch4_window7_224_in22k",
    pretrained=False,
    opt_func=QHAdam,
    metrics=[F1Score(average="macro")],
)

In [None]:
lr = learn.lr_find().valley
print(lr)

In [None]:
filename = "model"
learn.fine_tune(
    10,
    1e-3,
    freeze_epochs=1,
    cbs=[
        SaveModelCallback(monitor="f1_score", fname=filename, at_end=True),
        ],
)


In [None]:
learn.export(SAVE_MODELS_DIR / model_save_name)

## Submittion

In [None]:
test_files = [str(path) for path in sorted(list(R_TEST_DATASET.iterdir()))]
test_dl = learn.dls.test_dl(test_files)
if not TTA_PREDICT:
    preds, _ = learn.get_preds(dl=test_dl) 
else:
    preds, _  = learn.tta(dl=test_dl, beta=0.3, n=7)

In [None]:
subm = pd.DataFrame(
    {
        "image_name": [str(path.name) for path in sorted(list(O_TEST_DATASET.iterdir()))],
        "label_id": preds.argmax(-1),
    }
)
subm.to_csv('submissoins/subm1.csv', index=False, sep='\t')