In [1]:



import pandas as pd
import numpy as np
import jsonlines
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import torch.nn as nn
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch_optimizer as optim


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from importlib import reload
pd.set_option('display.max_rows', 500)
pd.set_option('display.float_format', '{:0.3f}'.format)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.width = 0
import warnings
import torchvision
warnings.filterwarnings('ignore')

from facebook_hateful_memes_detector.utils.globals import set_global, get_global
set_global("cache_dir", "/home/ahemf/cache/cache")
set_global("dataloader_workers", 4)
set_global("use_autocast", True)
set_global("models_dir", "/home/ahemf/cache/")

from facebook_hateful_memes_detector.utils import read_json_lines_into_df, in_notebook, set_device
get_global("cache_dir")
from facebook_hateful_memes_detector.models import Fasttext1DCNNModel, MultiImageMultiTextAttentionEarlyFusionModel, LangFeaturesModel, AlbertClassifer, TransformerImageModel

from facebook_hateful_memes_detector.preprocessing import TextImageDataset, my_collate, get_datasets, get_image2torchvision_transforms, TextAugment
from facebook_hateful_memes_detector.preprocessing import DefinedRotation, QuadrantCut, ImageAugment, DefinedAffine, HalfSwap
from facebook_hateful_memes_detector.training import *
import facebook_hateful_memes_detector
reload(facebook_hateful_memes_detector)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
set_device(device)


https://discuss.pytorch.org/t/how-to-implement-torch-optim-lr-scheduler-cosineannealinglr/28797/11

# Params

In [2]:
choice_probas = {
    "keyboard": 0.1,
    "char_substitute": 0.0,
    "char_insert": 0.1,
    "char_swap": 0.0,
    "ocr": 0.0,
    "char_delete": 0.1,
    "fasttext": 0.0,
    "glove_twitter": 0.0,
    "glove_wiki": 0.0,
    "word2vec": 0.0,
    "split": 0.1,
    "stopword_insert": 0.4,
    "word_join": 0.1,
    "word_cutout": 0.8,
    "text_rotate": 0.,
    "sentence_shuffle": 0.5,
    "one_third_cut": 0.4,
    "half_cut": 0.
}
preprocess_text = TextAugment([0.0, 0.1, 0.05, 0.35, 0.3, 0.2],
                              choice_probas,
                              fasttext_file="wiki-news-300d-1M-subword.bin")

augs_dict = dict(
    grayscale=transforms.Grayscale(num_output_channels=3),
    hflip=transforms.RandomHorizontalFlip(p=1.0),
    rc2=transforms.Compose(
        [transforms.Resize(480),
         transforms.CenterCrop(400)]),
    rotate=DefinedRotation(15),
    affine=DefinedAffine(0, scale=(0.6, 0.6)),
    translate1=DefinedAffine(0, translate=(0.25, 0.25)),
    swap=HalfSwap(),
)
im_transform = ImageAugment(count_proba=[0.0, 1.0],
                            augs_dict=augs_dict,
                            choice_probas="uniform")

torchvision_pre_image_transform = transforms.Compose([
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.RandomPerspective(distortion_scale=0.25, p=0.2),
    transforms.ColorJitter(brightness=0.1,
                           contrast=0.1,
                           saturation=0.1,
                           hue=0.1),
    transforms.RandomChoice([
        transforms.RandomRotation(15),
        DefinedRotation(90),
        transforms.RandomAffine(
            0,
            translate=(0.25, 0.25),
            scale=(0.6, 1.4),
            shear=None,
        ),
        transforms.RandomResizedCrop(480, scale=(0.6, 1.0))  # Zoom in
    ]),
])

data = get_datasets(
    data_dir="../data/",
    train_text_transform=preprocess_text,
    train_image_transform=im_transform,
    test_text_transform=None,
    test_image_transform=None,
    train_torchvision_image_transform=transforms.RandomErasing(p=0.5,
                                                               scale=(0.05,
                                                                      0.2),
                                                               ratio=(0.3,
                                                                      3.3),
                                                               value=0,
                                                               inplace=False),
    test_torchvision_image_transform=None,
    train_torchvision_pre_image_transform=torchvision_pre_image_transform,
    test_torchvision_pre_image_transform=None,
    cache_images=True,
    use_images=True,
    dev=False,
    test_dev=True,
    keep_original_text=False,
    keep_original_image=False,
    keep_processed_image=True,
    keep_torchvision_image=True,
    train_mixup_config=dict(proba=0.0),
)


In [3]:
sgd = torch.optim.SGD
sgd_params = dict(lr=2e-2, momentum=0.9, dampening=0, weight_decay=0, nesterov=False)

rangerQH = optim.RangerQH
rangerQHparams = dict(lr=1e-3, betas=(0.9, 0.999), nus=(.7, 1.0),
    weight_decay=0.0,
    k=6,
    alpha=.5,
    decouple_weight_decay=True,
    eps=1e-8,)

adam = torch.optim.Adam
adam_params = params=dict(lr=1e-4, weight_decay=1e-6)

adamw = torch.optim.AdamW
adamw_params = dict(lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-2)

novograd = optim.NovoGrad
novograd_params = dict(lr= 1e-3,
    betas=(0.9, 0.999),
    eps=1e-8,
    weight_decay=0,
    grad_averaging=False,
    amsgrad=False,)

qhadam = optim.QHAdam
qhadam_params = dict(lr= 1e-3,
    betas=(0.9, 0.999),
    nus=(1.0, 1.0),
    weight_decay=0,
    decouple_weight_decay=False,
    eps=1e-8,)

radam = optim.RAdam
radam_params = dict(lr= 1e-3,
    betas=(0.9, 0.999),
    eps=1e-8,
    weight_decay=0,)

yogi = optim.Yogi
yogi_params = dict(lr= 1e-2,
    betas=(0.9, 0.999),
    eps=1e-3,
    initial_accumulator=1e-6,
    weight_decay=0)




In [4]:
batch_size=256
epochs = 25

adamw = torch.optim.AdamW
adamw_params = dict(lr=1e-4, betas=(0.9, 0.99), eps=1e-08, weight_decay=1e-3)
optimizer = adamw
optimizer_params = adamw_params

scheduler_init_fn = get_multistep_lr([5, 7, 10, 17], gamma=0.1) # get_cosine_schedule_with_warmup # get_cosine_with_hard_restarts_schedule_with_warmup
scheduler_init_fn = get_cosine_schedule_with_warmup()
reg_sched = get_regularizer_scheduler()


# {"lr": optimizer_params["lr"]/500}



# Transformer Image Model

In [None]:
lr_strategy = {
    "finetune": True,
    "im_models": {
        "lr": optimizer_params["lr"] / 10,
        "torchvision_resnet18_ssl-contrastive": {
            "lambd": {
                "7": {
                    "finetune": True
                },
                "8": {
                    "finetune": True
                }
            },
            "lr": optimizer_params["lr"] / 10,
            "finetune": False,
        },
        "vgg_face": {
            "lr": optimizer_params["lr"] / 10,
            "lambd": {
                "0": {
                    "feat_extract": {
                        "finetune": True
                    }
                }
            },
            "finetune": False,
        }
    }
}

adam = torch.optim.Adam
adam_params = params=dict(lr=5e-4, weight_decay=1e-6)
adamw = torch.optim.AdamW
adamw_params = dict(lr=1e-4, betas=(0.9, 0.99), eps=1e-08, weight_decay=1e-3)
optimizer = adamw
optimizer_params = adamw_params

model_fn = model_builder(
    TransformerImageModel,
    dict(
        image_models=[
            #             {
            #                 "model": 'caption_features',
            #                 "gaussian_noise": 0.0,
            #                 "dropout": 0.0
            #             },
            {
                "model": 'vgg_face',
                "gaussian_noise": 0.0, # 0.0
                "dropout": 0.1,
            },
            {
                "model": 'detr',
                "gaussian_noise": 0.0,
                "dropout": 0.0
            },
            {
                "model": "torchvision_resnet18_ssl-contrastive",
                "large_rf": True,
                "dropout": 0.1,
                "gaussian_noise": 0.0, # 0.0
            },
        ],
        classifier_dims=768,
        num_classes=2,
        gaussian_noise=0.0,
        dropout=0.0,
        word_masking_proba=0.0,
        internal_dims=768,
        final_layer_builder=fb_1d_loss_builder,
        n_layers=2,
        n_encoders=0,
        n_decoders=1,
        n_tokens_in=160,
        n_tokens_out=32,
        featurizer="transformer",
        model='distilbert-nsp',
        loss="focal",
        classification_head="decoder_ensemble",  # decoder_ensemble
        dice_loss_coef=0.0,
        auc_loss_coef=0.5,
        attention_drop_proba=0.0,
        finetune=False,
    ),
    per_param_opts_fn=lr_strategy,
    optimiser_class=optimizer,
    optimiser_params=optimizer_params)

batch_size=40
epochs = 20
kfold = False
results, prfs = train_validate_ntimes(
    model_fn,
    data,
    batch_size,
    epochs,
    kfold=kfold,
    scheduler_init_fn=scheduler_init_fn,
    accumulation_steps=4,
    model_call_back=reg_sched, # reg_sched
    validation_epochs=[4, 7, 9, 11, 14, 17, 19, 23, 27, 31, 34, 37, 41, 44, 47, 51, 54],
    show_model_stats=False,
    sampling_policy="without_replacement",
    prediction_iters=1, evaluate_in_train_mode=True
)
r2, p2 = results, prfs
results
prfs

# 0.854	0.654 (0.765	0.620) dropout=0.05 lr=1e-4
# 0.827	0.638 (0.732	0.590) dropout=0.1 lr=1e-4

# 0.871	0.641 (0.871	0.641) gaussian_noise=0.05, dropout=0.05, word_masking_proba=0.15,


In [2]:
# 16 sized outputs were best for decoder.
# For ASIN tasks of AMLC train image model with other supervised tasks like GL/Category etc predictions. 
# MLM for transformerImage Model needed


# Multi-Text Multi Image

In [4]:
lr_strategy = {
    "im_models": {
        "lr": optimizer_params["lr"] / 10,
        "torchvision_resnet18_ssl-contrastive": {
            "lambd": {
                "8": {
                    "finetune": True
                }
            },
            "lr": optimizer_params["lr"] / 10,
            "finetune": False,
        },
        "vgg_face": {
            "lr": optimizer_params["lr"] / 10,
            "lambd": {
                "0": {
                    "feat_extract": {
                        "finetune": True
                    }
                }
            },
            "finetune": False,
        }
    }
}

model_fn = model_builder(
    MultiImageMultiTextAttentionEarlyFusionModel,
    dict(
        image_models=[
            #             {
            #                 "model": 'caption_features',
            #                 "gaussian_noise": 0.0,
            #                 "dropout": 0.0
            #             },
            {
                "model": 'vgg_face',
                "gaussian_noise": 0.0,
                "dropout": 0.0,
            },
            #             {
            #                 "model": 'detr_resnet50',
            #                 "gaussian_noise": 0.0,
            #                 "dropout": 0.0
            #             },
            #             {
            #                 "model": 'detr_resnet50_panoptic',
            #                 "gaussian_noise": 0.0,
            #                 "dropout": 0.0
            #             },
            {
                "model": "torchvision_resnet18_ssl-contrastive",
                "large_rf": True,
                "dropout": 0.0,
                "gaussian_noise": 0.0,
            },
        ],
        num_classes=2,
        text_models=[
            dict(
                cls=Fasttext1DCNNModel,
                params=dict(
                    classifier_dims=256,
                    num_classes=2,
                    n_tokens_in=64,
                    n_tokens_out=16,
                    n_layers=2,
                    final_layer_builder=lambda *args: None,
                    gaussian_noise=0.0,
                    dropout=0.0,
                    embedding_dims=256,
                    internal_dims=256,
                    featurizer="gru",
                ),
                in_channels=256,
                in_tokens=64,
                forward="get_word_vectors",
                dropout=0.2,
                gaussian_noise=0.25,
            ),
            dict(
                cls=AlbertClassifer,
                params=dict(classifier_dims=256,
                            num_classes=2,
                            embedding_dims=768,
                            gaussian_noise=0.0,
                            dropout=0.0,
                            word_masking_proba=0.25,
                            internal_dims=512,
                            final_layer_builder=fb_1d_loss_builder,
                            n_layers=2,
                            n_encoders=2,
                            n_decoders=2,
                            n_tokens_in=96,
                            n_tokens_out=16,
                            featurizer="transformer",
                            model='./distilbert-nsp',
                            finetune=False),
                in_channels=768,
                in_tokens=96,
                forward="get_word_vectors",
                dropout=0.2,
                gaussian_noise=0.25,
            )
        ],
        internal_dims=256,
        classifier_dims=256,
        n_tokens_out=32,
        n_layers=2,
        n_encoders=2,
        n_decoders=2,
        final_layer_builder=fb_1d_loss_builder,
        gaussian_noise=0.75,
        dropout=0.3,  # 0.3
        loss="focal",
        dice_loss_coef=0.0,
        auc_loss_coef=0.0,
    ),
    per_param_opts_fn=lr_strategy,
    optimiser_class=optimizer,
    optimiser_params=optimizer_params)

data["metadata"]["use_images"] = True
batch_size = 64
epochs = 5
kfold = False
results, prfs = train_validate_ntimes(
    model_fn,
    data,
    batch_size,
    epochs,
    kfold=kfold,
    scheduler_init_fn=scheduler_init_fn,
    validation_epochs=[7, 11, 14, 17, 20, 23, 27],
    show_model_stats=False,
    sampling_policy="without_replacement",
    accumulation_steps=4,
    model_call_back=reg_sched,
)
r1, p1 = results, prfs
results
prfs

# 0.824	0.750 (0.761	0.711)

# "detr_demo", 'detr_resnet50', 'detr_resnet50_panoptic', 'detr_resnet101', 'detr_resnet101_panoptic', "caption_features"

NameError: name 'optimizer_params' is not defined

# Predict

In [None]:
batch_size=512
epochs = 1

submission, text_model = train_and_predict(model_fn, data, batch_size, epochs, scheduler_init_fn=scheduler_init_fn)
submission.to_csv("submission.csv",index=False)
submission.sample(3)


In [None]:
submission.sample(10)