In [1]:
# !pip install torch==2.2.2+cu121 torchvision==0.17.2+cu121 torchaudio==2.2.2+cu121 -f https://pypi.tuna.tsinghua.edu.cn/simple -f https://download.pytorch.org/whl/torch_stable.html
# !pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# TASK 1

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import logging
import random
from functools import partial
import sklearn
from sklearn.model_selection import StratifiedKFold
import datasets
import transformers
import torch
import copy
from torch import nn
import gc
import torch.nn.functional as F
from tqdm.auto import tqdm, trange
from scipy.special import softmax, expit
logger = logging.getLogger(__name__)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def set_seeds(self, seed=2023): 
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [3]:
# Source: https://huggingface.co/l3cube-pune/assamese-bert
params = dict(learning_rate=1.0e-5, seed=2023, data_seed=2023, n_splits=10,
              model_id='models--l3cube-pune--bengali-bert', tensorboard_dir='tensorboard', label_col='task_1',
              patience=5, metric='f1', metric_mode='max', train_batch_size=32, eval_batch_size=32,
              train_max_length=512, eval_max_length=512, learning_rate_scheduling='cosine_annealing',
              gradient_accumulation_steps=1, warmup_proportion=50, num_train_epochs=100, max_seq=512,
              fp16=False, logging_steps=None, model_dir='/model/', password='Bangla')

y_map = {0.0: 'Negative', 1.0: 'Neutral', 2.0: 'Positive'}

In [4]:
set_seeds(params['seed'])

# Data Preparation

In [5]:
import getpass
import zipfile
from pathlib import Path
def prepare_data(path, password, train='Bangla_train_data.csv', test='bengali_test_data_wo_label.csv'):
    zfs = [zipfile.ZipFile(path / f.replace('.csv', '.zip')) for f in [train, test]]
    return [pd.read_csv(zfs[i].open(f, 'r', password)) for i, f in enumerate([train, test])]

def split(train_df, y, n_splits=5, data_seed=2023, shuffle=True):
    skf = StratifiedKFold(n_splits=n_splits, random_state=data_seed, shuffle=shuffle)
    train_df['split'] = -1
    for i, (train_index, test_index) in enumerate(skf.split(train_df.index, y)):
        train_df.loc[train_df.index.isin(test_index), 'split'] = i
    return train_df        

path = Path('Bangla')
password =  bytes(getpass.getpass(f"Input file password") if 'password' not in params else params['password'], 'utf-8')
train_df, test_df = prepare_data(path, password=password)
train_df = split(train_df, train_df['Sentiment'], n_splits=params['n_splits'], data_seed=params['data_seed'])
train_df['task_1'] = train_df['Sentiment'].map({v: k for k, v in y_map.items()})
print(len(train_df), len(test_df))

2693 1821


In [6]:
def prepare_feature(df, y_col, tokenizer, max_length):
    texts = df['OCR'].astype(str).tolist()
    features = tokenizer(texts, padding=False, truncation=True, return_tensors=None, max_length=max_length).data
    if y_col in df.columns:
        features['labels'] = df[y_col].tolist()
    return features

In [7]:
train_df.head(10)

Unnamed: 0,Ids,Sentiment,Sarcasm,Vulgar,Abuse,Target,OCR,split,task_1
0,image_ben_3635.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,I don't need any fucking GIRL বাঁরা কি সাইলেন...,8,1.0
1,image_ben_1586.png,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"After a little problem in relationship Boys, G...",1,0.0
2,image_ben_4040.jpg,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"মা জানো, পাশের বাড়ির চুলবুলি আমার থেকে বেশি অ...",1,0.0
3,image_ben_5074.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,Gf কে ফিঙ্গারং করে দেওয়ার পর যখন সে চরমসুখ অনু...,0,1.0
4,image_ben_7351.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,টিপতে টিপতে যখন ব্যাথ্যা অনুভব হয়,5,0.0
5,image_ben_3945.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,দাদু হবে নাকি..? টমি ও মিচেল যথাক্রমে 20 ও 25 ...,6,1.0
6,image_ben_4715.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,Dildo:- I am the best sex-toy for women লে নার...,9,0.0
7,image_ben_4801.jpg,Positive,Sarcastic,Non Vulgar,Abusive,Gender,এক রাউন্ড খেলার পর ছেলেরা পর পর দুই রাউন্ড খেল...,9,2.0
8,image_ben_7196.jpg,Negative,Sarcastic,Non Vulgar,Abusive,Gender,ফেচবুকে ডিপিতে ময়দা মেখেই রাখব সে রিয়ালিটিতে আ...,9,0.0
9,image_ben_4743.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,মন্দিরে গিয়ে ঠাকুর দেখার সময় যখন ফাঁকা রুমে ...,0,0.0


In [8]:
train_df.loc[train_df['task_1'] == 1.0, 'task_1'] = 0.0
train_df.loc[train_df['task_1'] == 2.0, 'task_1'] = 1.0
print(train_df['task_1'].unique())

[0. 1.]


In [9]:
train_df.head(10)

Unnamed: 0,Ids,Sentiment,Sarcasm,Vulgar,Abuse,Target,OCR,split,task_1
0,image_ben_3635.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,I don't need any fucking GIRL বাঁরা কি সাইলেন...,8,0.0
1,image_ben_1586.png,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"After a little problem in relationship Boys, G...",1,0.0
2,image_ben_4040.jpg,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"মা জানো, পাশের বাড়ির চুলবুলি আমার থেকে বেশি অ...",1,0.0
3,image_ben_5074.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,Gf কে ফিঙ্গারং করে দেওয়ার পর যখন সে চরমসুখ অনু...,0,0.0
4,image_ben_7351.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,টিপতে টিপতে যখন ব্যাথ্যা অনুভব হয়,5,0.0
5,image_ben_3945.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,দাদু হবে নাকি..? টমি ও মিচেল যথাক্রমে 20 ও 25 ...,6,0.0
6,image_ben_4715.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,Dildo:- I am the best sex-toy for women লে নার...,9,0.0
7,image_ben_4801.jpg,Positive,Sarcastic,Non Vulgar,Abusive,Gender,এক রাউন্ড খেলার পর ছেলেরা পর পর দুই রাউন্ড খেল...,9,1.0
8,image_ben_7196.jpg,Negative,Sarcastic,Non Vulgar,Abusive,Gender,ফেচবুকে ডিপিতে ময়দা মেখেই রাখব সে রিয়ালিটিতে আ...,9,0.0
9,image_ben_4743.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,মন্দিরে গিয়ে ঠাকুর দেখার সময় যখন ফাঁকা রুমে ...,0,0.0


# Prepare Transformer Trainer

In [10]:
class Model(nn.Module):
    def __init__(self, transformer):
        super().__init__()
        self.transformer = transformer 
        
    def m_loss(self, logits, labels):
        return F.binary_cross_entropy_with_logits(logits.squeeze(-1), labels)
        
    def forward(self, **kwargs):
        labels = kwargs.pop("labels", None)
        output = self.transformer(**kwargs)
        output.loss = self.m_loss(output.logits, labels) if labels != None else output.loss
        output.logits = F.sigmoid(output.logits)
        return output
    
    def load(self, model_dir):
        self.transformer.from_pretrained(model_dir)
        print(f"Model loaded: {model_dir}")
    
    def save(self, model_dir):
        self.transformer.save_pretrained(model_dir)
    
class PTTrainer(transformers.Trainer):
    def __init__(self, **kwargs): 
        super().__init__(**kwargs)
        self.compute_metrics = self._metrics if self.eval_dataset is not None else None
        self._patience = 0

    def _metrics(self, eval_preds):
        """
        Evaluation metrics
        """
        threshold = 0.5
        thresholds = np.arange(0.01, 1.0, 0.01) 
        scores, labels = eval_preds
        acc, f1, thr = -1, -1, 0
        auc = sklearn.metrics.roc_auc_score(labels, scores)
        for threshold in thresholds:
            predictions = (scores >= threshold).astype(int)
            new_f1 = sklearn.metrics.f1_score(labels, predictions, average='macro')
            if new_f1 > f1:
                acc = sklearn.metrics.accuracy_score(labels, predictions)
                f1 = new_f1
                thr = threshold
        return {'auc': auc, 'accuracy': acc, 'f1': f1, 'thr': thr}
        
    def compute_loss(self, model, inputs, return_outputs=False):
        """
        forward/backward step
        """
        output = model(**inputs)
        return (output.loss, {'logits': output.logits}) if return_outputs else output.loss

    def _save_checkpoint(self, model, trial, metrics=None):
        # Determine the new best metric / best model checkpoint
        if metrics is not None and self.args.metric_for_best_model is not None:
            metric_to_check = self.args.metric_for_best_model
            if not metric_to_check.startswith("eval_"):
                metric_to_check = f"eval_{metric_to_check}"
            metric_value = metrics[metric_to_check]

            operator = np.greater if self.args.greater_is_better else np.less
            if (
                self.state.best_metric is None
                # or self.state.best_model_checkpoint is None
                or operator(metric_value, self.state.best_metric)
            ):
                self.state.best_metric = metric_value
                # self.state.best_model_checkpoint = self.args.output_dir
                model.save(self.args.output_dir)
                print(f'Model Saved: {metric_value}')
                self._patience = 0
            else:
                print(f"No improvments: {metric_value}, patience: {self._patience}, lr: {self.optimizer.param_groups[0]['lr']}")
                self._patience += 1
            # LR annealing
            if params['learning_rate_scheduling'].lower() == 'cosine_annealing':
                if (self._patience - 1) == int(round(params['patience'] / 2)):
                    if not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.CosineAnnealingLR):
                        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=10, eta_min=max(self.args.learning_rate * 0.01, 1.0e-7), verbose=False)
                        print(f'Starting CosineAnnealingLR schedular.')
                        self._patience -= 1
        else:
            model.save(self.args.output_dir)
            print('Model Saved')


class PTDefaultFlowCallback(transformers.DefaultFlowCallback):
    def on_epoch_end(self, args, state, control, **kwargs):
        control = super().on_epoch_end(args, state, control, **kwargs)
        # Evaluate
        if args.evaluation_strategy != transformers.IntervalStrategy.EPOCH \
            and state.epoch == args.num_train_epochs:
            control.should_evaluate = state.best_metric is not None
            control.should_save = True
        return control
    
def get_best_thr(history):
    max_i = -1
    for i, record in enumerate(history):
        if f"eval_{params['metric']}" in record:
            if max_i == -1 or record[f"eval_{params['metric']}"] > history[max_i][f"eval_{params['metric']}"]:
                max_i = i
    return history[max_i]['eval_thr'], history[max_i]['eval_f1']

def apply_thr(y, threshold, mapper=None):
    if mapper is None:
        return (y > threshold).astype(int)
    else:
        return [mapper[x] for x in (y > threshold).astype(int)]

def inference(pred_features, threshold=None):
    print(f"Threshold: {threshold}")
    result = trainer.predict(datasets.Dataset.from_dict(dict(pred_features))).predictions
    if threshold is None:
        test_df[params['label_col']] = result[:, 0]
    else:
        test_df[params['label_col']] = apply_thr(result[:, 0], threshold, y_map)
    return test_df[[col for col in test_df.columns if col != 'text']]

In [11]:
transformer = transformers.AutoModelForSequenceClassification.from_pretrained(params['model_id'], num_labels=1)
_tokenizer = transformers.AutoTokenizer.from_pretrained(params['model_id'], model_max_length=params['max_seq'])
callbacks = [PTDefaultFlowCallback(), 
             transformers.EarlyStoppingCallback(early_stopping_patience=params['patience'])]

if isinstance(params['logging_steps'], int):
    save_strategy = transformers.IntervalStrategy.STEPS
else:
    save_strategy = transformers.IntervalStrategy.EPOCH 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at models--l3cube-pune--bengali-bert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
def get_trainer(valid_split=0):
    _model = Model(copy.deepcopy(transformer))
    optimizer = torch.optim.AdamW(_model.parameters(), lr=params['learning_rate'])
    model_dir = f"{params['model_dir']}{valid_split}"
    t_df, v_df = train_df[train_df['split'] != valid_split], train_df[train_df['split'] == valid_split]
    print(f"Train: {len(t_df)}, Valid: {len(v_df)}")
    train_features = prepare_feature(t_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    valid_features = prepare_feature(v_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    trainer = PTTrainer(model=_model,
        train_dataset=datasets.Dataset.from_dict(dict(train_features)),
        eval_dataset=datasets.Dataset.from_dict(dict(valid_features)) if valid_features is not None else None,
        args=transformers.TrainingArguments(
            label_names=['labels'],
            seed=params['seed'],
            data_seed=params['data_seed'],
            remove_unused_columns=False,
            report_to="tensorboard" if params['tensorboard_dir'] is not None else "none",
            logging_dir=params['tensorboard_dir'],
            per_device_train_batch_size=params['train_batch_size'], 
            per_device_eval_batch_size=params['eval_batch_size'],
            gradient_accumulation_steps=params['gradient_accumulation_steps'],
            warmup_steps=params['warmup_proportion'], 
            # max_steps=20,
            num_train_epochs=params['num_train_epochs'],
            learning_rate=params['learning_rate'], 
            fp16=params['fp16'],
            metric_for_best_model=params['metric'],
            # gradient_checkpointing=fp16,
            # evaluate_during_training=True,
            logging_steps=params['logging_steps'], 
            output_dir=model_dir,
            evaluation_strategy=save_strategy,
            save_strategy=save_strategy,
            logging_strategy=save_strategy,
            save_total_limit=1,
            greater_is_better=params['metric_mode'], 
            load_best_model_at_end=True,
            eval_steps=params['logging_steps'],
            save_steps=params['logging_steps'],
            overwrite_output_dir=True,
            max_grad_norm=1.0,
        ),
        optimizers=[optimizer, transformers.get_constant_schedule(optimizer)],
        callbacks=callbacks,
        data_collator=transformers.DataCollatorWithPadding(_tokenizer),
    )
    return _model, trainer, model_dir

# Train / Inference

In [13]:
pred_features = prepare_feature(test_df, params['label_col'], _tokenizer, max_length=params['eval_max_length'])
subm_df = None
thresholds = []
scores = []
cols = []
for fold in range(params['n_splits']):
    print(f"Fold: {fold + 1} out of {params['n_splits']}")
    col = f'y_{fold}'
    model, trainer, model_dir = get_trainer(fold)
    trainer.train()
    trainer.model.load(model_dir)
    subm = inference(pred_features)
    t, f1 = get_best_thr(trainer.state.log_history)
    thresholds.append(t)
    scores.append(f1)
    if subm_df is None:
        subm_df = subm
    subm_df[col] = subm[params['label_col']]
    del model, trainer
    torch.cuda.empty_cache()
    gc.collect()
    cols.append(col)

Fold: 1 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6815,0.661997,0.576125,0.662963,0.398664,0.45
2,0.6559,0.647622,0.572718,0.662963,0.398664,0.41
3,0.6441,0.639545,0.612376,0.644444,0.567308,0.38
4,0.6364,0.634742,0.585303,0.655556,0.575923,0.35
5,0.6271,0.63118,0.625821,0.692593,0.586004,0.33
6,0.6154,0.60043,0.701885,0.677778,0.657954,0.38
7,0.594,0.605611,0.656332,0.696296,0.656403,0.29
8,0.5661,0.583585,0.692492,0.725926,0.661218,0.5
9,0.5229,0.60117,0.682853,0.722222,0.677028,0.63
10,0.5058,0.598528,0.686875,0.7,0.661555,0.32


Model Saved: 0.3986636971046771
No improvments: 0.3986636971046771, patience: 0, lr: 1e-05
Model Saved: 0.5673076923076923
Model Saved: 0.5759233926128591
Model Saved: 0.5860043228464281
Model Saved: 0.6579541317801237
No improvments: 0.6564032528400273, patience: 0, lr: 1e-05
Model Saved: 0.6612181226261531
Model Saved: 0.6770283417598366
No improvments: 0.6615546511088071, patience: 0, lr: 1e-05
No improvments: 0.6612181226261531, patience: 1, lr: 1e-05
No improvments: 0.6681361543125912, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.6497145822522055, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.651070316124329, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/0
Threshold: None


Fold: 2 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6804,0.661246,0.432715,0.662963,0.398664,0.45
2,0.655,0.648252,0.490392,0.640741,0.530132,0.41
3,0.6454,0.642407,0.514519,0.6,0.458556,0.38
4,0.6397,0.641039,0.527473,0.618519,0.480953,0.37
5,0.6303,0.636593,0.576186,0.603704,0.574264,0.38
6,0.6187,0.638344,0.565873,0.62963,0.542187,0.31
7,0.5985,0.628032,0.620756,0.659259,0.591447,0.49
8,0.5658,0.645192,0.643471,0.62963,0.615363,0.56
9,0.5227,0.637683,0.63902,0.655556,0.606888,0.51
10,0.4787,0.669998,0.621677,0.625926,0.580326,0.53


Model Saved: 0.3986636971046771
Model Saved: 0.5301315057679542
No improvments: 0.4585561497326204, patience: 0, lr: 1e-05
No improvments: 0.4809533585919857, patience: 1, lr: 1e-05
Model Saved: 0.5742642832932994
No improvments: 0.5421866521975041, patience: 0, lr: 1e-05
Model Saved: 0.5914473684210526
Model Saved: 0.6153626987292723
No improvments: 0.6068884540117417, patience: 0, lr: 1e-05
No improvments: 0.5803259514612413, patience: 1, lr: 1e-05
No improvments: 0.6012759170653907, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.5954045954045954, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.5977403935688125, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/1
Threshold: None


Fold: 3 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6818,0.66134,0.581773,0.674074,0.476006,0.45
2,0.6562,0.647871,0.592148,0.659259,0.407499,0.41
3,0.6462,0.641813,0.600467,0.655556,0.442136,0.44
4,0.64,0.641821,0.560685,0.618519,0.530324,0.36
5,0.6347,0.626904,0.628031,0.692593,0.615338,0.34
6,0.6232,0.617151,0.652925,0.651852,0.618346,0.33
7,0.6137,0.623252,0.639788,0.696296,0.621538,0.48
8,0.5936,0.606814,0.658543,0.674074,0.631262,0.38
9,0.5621,0.61045,0.639143,0.714815,0.619753,0.56
10,0.5245,0.626849,0.64166,0.662963,0.613016,0.47


Model Saved: 0.47600564573041637
No improvments: 0.4074992844194256, patience: 0, lr: 1e-05
No improvments: 0.4421363666659261, patience: 1, lr: 1e-05
Model Saved: 0.5303237574099408
Model Saved: 0.6153384026502343
Model Saved: 0.6183458646616542
Model Saved: 0.6215384615384615
Model Saved: 0.631262027438078
No improvments: 0.6197530864197531, patience: 0, lr: 1e-05
No improvments: 0.6130160180183017, patience: 1, lr: 1e-05
No improvments: 0.6153384026502343, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.600328947368421, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.597318810222036, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/2
Threshold: None


Fold: 4 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6805,0.661271,0.550155,0.665428,0.399554,0.45
2,0.6557,0.647552,0.608939,0.665428,0.399554,0.41
3,0.6453,0.640772,0.633768,0.665428,0.399554,0.38
4,0.6409,0.636342,0.66586,0.680297,0.563839,0.36
5,0.6369,0.63753,0.359094,0.665428,0.399554,0.34
6,0.6389,0.636958,0.641899,0.665428,0.399554,0.34
7,0.6368,0.623686,0.70031,0.698885,0.652804,0.4
8,0.6188,0.613521,0.696245,0.654275,0.633531,0.42
9,0.5938,0.597571,0.690658,0.702602,0.65601,0.33
10,0.5639,0.601982,0.672874,0.70632,0.644733,0.31


Model Saved: 0.39955357142857145
No improvments: 0.39955357142857145, patience: 0, lr: 1e-05
No improvments: 0.39955357142857145, patience: 1, lr: 1e-05
Model Saved: 0.5638386123680241
No improvments: 0.39955357142857145, patience: 0, lr: 1e-05
No improvments: 0.39955357142857145, patience: 1, lr: 1e-05
Model Saved: 0.6528036713035996
No improvments: 0.6335310920676774, patience: 0, lr: 1e-05
Model Saved: 0.6560102301790282
No improvments: 0.6447331026296872, patience: 0, lr: 1e-05
No improvments: 0.633344558297654, patience: 1, lr: 1e-05
No improvments: 0.6267449052944816, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.6326115922718187, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.6185126758767946, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/3
Threshold: None


Fold: 5 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6797,0.661377,0.519522,0.665428,0.399554,0.45
2,0.6549,0.64672,0.582061,0.665428,0.399554,0.41
3,0.6446,0.638734,0.645003,0.657993,0.52843,0.38
4,0.639,0.628408,0.648572,0.639405,0.613214,0.38
5,0.6261,0.631747,0.622098,0.63197,0.594268,0.33
6,0.6005,0.623669,0.610304,0.665428,0.607948,0.36
7,0.5641,0.639779,0.627902,0.650558,0.584877,0.37
8,0.5314,0.646196,0.598355,0.609665,0.571792,0.27
9,0.5451,0.657682,0.508442,0.64684,0.530282,0.27


Model Saved: 0.39955357142857145
No improvments: 0.39955357142857145, patience: 0, lr: 1e-05
Model Saved: 0.5284298780487805
Model Saved: 0.6132135604275062
No improvments: 0.5942684766214178, patience: 0, lr: 1e-05
No improvments: 0.6079479207151185, patience: 1, lr: 1e-05
No improvments: 0.5848765432098766, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.5717924227952882, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.530282143185369, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/4
Threshold: None


Fold: 6 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6803,0.661238,0.52036,0.665428,0.399554,0.45
2,0.6542,0.646072,0.538268,0.66171,0.408505,0.4
3,0.6441,0.639267,0.590317,0.594796,0.567135,0.37
4,0.6388,0.633472,0.59969,0.613383,0.576891,0.36
5,0.6304,0.62531,0.613811,0.628253,0.591185,0.36
6,0.6149,0.620329,0.629547,0.669145,0.599763,0.43
7,0.5969,0.609089,0.65779,0.67658,0.631594,0.3
8,0.5726,0.624057,0.636561,0.650558,0.625947,0.34
9,0.537,0.626695,0.594258,0.665428,0.587367,0.28
10,0.5138,0.627951,0.640937,0.66171,0.596702,0.24


Model Saved: 0.39955357142857145
Model Saved: 0.40850549716080703
Model Saved: 0.5671346531437766
Model Saved: 0.5768905021173624
Model Saved: 0.5911854103343466
Model Saved: 0.599762609291673
Model Saved: 0.6315938606847697
No improvments: 0.6259467455621301, patience: 0, lr: 1e-05
No improvments: 0.5873670575402236, patience: 1, lr: 1e-05
No improvments: 0.5967016491754122, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.5967016491754122, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.6084792713272924, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/5
Threshold: None


Fold: 7 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6796,0.660243,0.532464,0.657993,0.559895,0.44
2,0.6549,0.646256,0.577219,0.657993,0.574777,0.4
3,0.6445,0.639224,0.5946,0.669145,0.519845,0.38
4,0.6389,0.633972,0.630726,0.561338,0.558648,0.35
5,0.6333,0.63642,0.546927,0.684015,0.541425,0.33
6,0.6278,0.621985,0.666636,0.687732,0.608524,0.35
7,0.6157,0.620746,0.6527,0.684015,0.605511,0.32
8,0.6014,0.614866,0.668622,0.684015,0.608724,0.31
9,0.5828,0.613361,0.645251,0.69145,0.604727,0.31
10,0.5643,0.599814,0.696214,0.69145,0.637225,0.3


Model Saved: 0.5598947218665529
Model Saved: 0.5747766323024055
No improvments: 0.5198451695714086, patience: 0, lr: 1e-05
No improvments: 0.5586484983314794, patience: 1, lr: 1e-05
No improvments: 0.5414251619502216, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6085239085239085
No improvments: 0.605510601956488, patience: 0, lr: 1.0453658778440268e-06
Model Saved: 0.6087239249105875
No improvments: 0.6047269186509693, patience: 0, lr: 6.579634122156521e-06
Model Saved: 0.6372247948655455
No improvments: 0.6302694991039894, patience: 0, lr: 6.579634122155324e-06
No improvments: 0.6347321063394683, patience: 1, lr: 1.045365877844104e-06
Model Saved: 0.6410201492660264
No improvments: 0.6259304703476483, patience: 0, lr: 6.5796341221570615e-06
No improvments: 0.6323646466921979, patience: 1, lr: 1.0000000000001578e-05
No improvments: 0.629163516698537, patience: 2, lr: 6.579634122155887e-06
Model Saved: 0.643448055212761
Model Saved: 0.6451994360303817
Mode

Fold: 8 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6791,0.660581,0.603933,0.66171,0.39821,0.44
2,0.6546,0.64793,0.573126,0.501859,0.501024,0.4
3,0.6439,0.640153,0.593715,0.513011,0.507677,0.37
4,0.6356,0.635678,0.574021,0.591078,0.554582,0.35
5,0.6168,0.63446,0.604766,0.635688,0.597355,0.46
6,0.5898,0.631939,0.611063,0.609665,0.577708,0.3
7,0.5484,0.639546,0.603223,0.64684,0.604516,0.41
8,0.5063,0.639822,0.62162,0.654275,0.606187,0.49
9,0.4666,0.655475,0.625386,0.635688,0.614546,0.32
10,0.4366,0.66458,0.629893,0.67658,0.596228,0.25


Model Saved: 0.3982102908277405
Model Saved: 0.5010243632336656
Model Saved: 0.5076770470961343
Model Saved: 0.5545821290944123
Model Saved: 0.5973545943304008
No improvments: 0.5777080062794349, patience: 0, lr: 1e-05
Model Saved: 0.6045158394849652
Model Saved: 0.6061865407319953
Model Saved: 0.614545560884314
No improvments: 0.5962284984731112, patience: 0, lr: 1e-05
No improvments: 0.5723064830198117, patience: 1, lr: 1e-05
No improvments: 0.5811044372572172, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6306561854036845
No improvments: 0.6230697002527988, patience: 0, lr: 1.0453658778440268e-06
No improvments: 0.608407605466429, patience: 1, lr: 1.045365877844024e-06
No improvments: 0.6193701830443159, patience: 2, lr: 6.579634122156521e-06
Model Saved: 0.635567196531792
No improvments: 0.6283161385408991, patience: 0, lr: 6.579634122155324e-06
No improvments: 0.6190691087907129, patience: 1, lr: 1.045365877844104e-06
No improvments: 0.627026869244129

Fold: 9 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6796,0.660835,0.551025,0.665428,0.42944,0.44
2,0.6544,0.647703,0.557198,0.669145,0.421505,0.4
3,0.6443,0.642449,0.575627,0.665428,0.44726,0.37
4,0.6396,0.637586,0.462681,0.64684,0.499363,0.35
5,0.6344,0.632948,0.610322,0.66171,0.604903,0.41
6,0.6251,0.631047,0.635819,0.594796,0.572062,0.44
7,0.6049,0.630463,0.636961,0.657993,0.596518,0.51
8,0.5743,0.633049,0.571861,0.64684,0.569527,0.34
9,0.5355,0.656974,0.607174,0.63197,0.572954,0.57
10,0.5505,0.674998,0.634152,0.576208,0.566129,0.29


Model Saved: 0.42944004524886875
No improvments: 0.421505376344086, patience: 0, lr: 1e-05
Model Saved: 0.4472602739726027
Model Saved: 0.49936330688608094
Model Saved: 0.6049034007456785
No improvments: 0.5720624078695797, patience: 0, lr: 1e-05
No improvments: 0.5965175427155341, patience: 1, lr: 1e-05
No improvments: 0.5695274993683146, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.5729542502525617, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.5661290322580645, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/8
Threshold: None


Fold: 10 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6806,0.661862,0.514755,0.66171,0.39821,0.45
2,0.6551,0.648347,0.575225,0.66171,0.39821,0.41
3,0.6452,0.642207,0.570379,0.66171,0.39821,0.38
4,0.6404,0.637776,0.59779,0.672862,0.575162,0.36
5,0.6323,0.628117,0.621558,0.665428,0.584021,0.39
6,0.6061,0.622462,0.637054,0.64684,0.602325,0.4
7,0.5595,0.646235,0.622916,0.598513,0.571959,0.31
8,0.5103,0.637139,0.6465,0.64684,0.608682,0.33
9,0.4802,0.630796,0.659588,0.665428,0.626343,0.4
10,0.4194,0.640277,0.665267,0.669145,0.629494,0.23


Model Saved: 0.3982102908277405
No improvments: 0.3982102908277405, patience: 0, lr: 1e-05
No improvments: 0.3982102908277405, patience: 1, lr: 1e-05
Model Saved: 0.5751615218951902
Model Saved: 0.5840206185567011
Model Saved: 0.6023248937925024
No improvments: 0.571958981612447, patience: 0, lr: 1e-05
Model Saved: 0.6086823367276626
Model Saved: 0.6263427583652303
Model Saved: 0.6294937864648622
Model Saved: 0.6320668693009119
No improvments: 0.6314784884168398, patience: 0, lr: 1e-05
No improvments: 0.6209933075026417, patience: 1, lr: 1e-05
No improvments: 0.6132135604275062, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.6227320371436587, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.6275527864312911, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/9
Threshold: None


In [14]:
thresholds, scores

([0.63, 0.56, 0.38, 0.33, 0.38, 0.3, 0.75, 0.12, 0.41000000000000003, 0.23],
 [0.6770283417598366,
  0.6153626987292723,
  0.631262027438078,
  0.6560102301790282,
  0.6132135604275062,
  0.6315938606847697,
  0.664349376114082,
  0.635567196531792,
  0.6049034007456785,
  0.6320668693009119])

In [15]:
y_map = {0.0: 'Negative', 1.0: 'Positive'}
thr = np.array(thresholds).mean()
print(f"Best threshold: {thr}")
print(f"Best score: {np.array(scores).mean()}")
subm_df['avg'] = subm_df[cols].apply('mean', axis=1)
subm_df[params['label_col']] = apply_thr(subm_df['avg'], thr, y_map)

Best threshold: 0.409
Best score: 0.6361357561910955


In [17]:
subm_df.head(20)

Unnamed: 0,Ids,OCR,task_1,y_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,avg
0,image_ben_1422.png,বৌ-এর প্রেগনেন্সি রিপোর্ট পজিটিভ জানার পর বর চ...,Positive,0.756598,0.726574,0.694848,0.211931,0.258446,0.333247,0.866512,0.094603,0.397723,0.875627,0.521611
1,image_ben_3908.jpg,কিন্তু মিষ্টতা নেই,Negative,0.158137,0.167763,0.198197,0.20847,0.258289,0.221382,0.079579,0.103506,0.252656,0.083301,0.173128
2,image_ben_4634.jpg,মানুষ যা দেখে প্রথম সিঙারা আবিষ্কারের ধারণা পায়,Positive,0.759429,0.742555,0.712545,0.699032,0.635443,0.69339,0.882138,0.905293,0.647585,0.875158,0.755257
3,image_ben_3192.png,এক মেয়ে ফ্যানে ওড়না পাচিয়ে সুই*সাইড করতে যাচ্ছ...,Negative,0.159403,0.180209,0.198676,0.208291,0.258787,0.221133,0.082621,0.077005,0.54259,0.086377,0.201509
4,image_ben_4836.jpg,No text detected,Negative,0.158329,0.168225,0.198251,0.208401,0.258507,0.220299,0.077872,0.122565,0.252441,0.079659,0.174455
5,image_ben_3907.jpg,বাঘের চামড়া দিয়ে বিড়াল ঢেকে রাখার একটি চিত্র।,Positive,0.707538,0.5532,0.206425,0.689686,0.258257,0.687782,0.857298,0.081603,0.542554,0.875199,0.545954
6,image_ben_4636.jpg,"আমার উপর রেগে আছো রিয়া? না, রেগে নেই তাহলে মুখ...",Negative,0.158023,0.168878,0.19936,0.208551,0.258904,0.219469,0.077874,0.074663,0.252714,0.077862,0.16963
7,image_ben_5080.jpg,নিঃস্ব উপেন তার দুই বিঘা জমি,Negative,0.158166,0.168277,0.197724,0.208321,0.258345,0.219638,0.077015,0.098507,0.257988,0.077395,0.172138
8,image_ben_3704.jpg,"চলো চিন্টু মজা হবে ! Yes, chumki তাড়াতাড়ি এসো ...",Negative,0.15932,0.168174,0.672235,0.208177,0.258125,0.219557,0.077326,0.078953,0.606166,0.07775,0.252578
9,image_ben_5071.jpg,No text detected,Negative,0.158329,0.168225,0.198251,0.208401,0.258507,0.220299,0.077872,0.122565,0.252441,0.079659,0.174455


In [19]:
subm_df.loc[(subm_df['avg'] <= 0.5) & (subm_df['task_1'] == 'Positive'), 'task_1'] = 'Neutral'

In [20]:
subm_df.head(20)

Unnamed: 0,Ids,OCR,task_1,y_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,avg
0,image_ben_1422.png,বৌ-এর প্রেগনেন্সি রিপোর্ট পজিটিভ জানার পর বর চ...,Positive,0.756598,0.726574,0.694848,0.211931,0.258446,0.333247,0.866512,0.094603,0.397723,0.875627,0.521611
1,image_ben_3908.jpg,কিন্তু মিষ্টতা নেই,Negative,0.158137,0.167763,0.198197,0.20847,0.258289,0.221382,0.079579,0.103506,0.252656,0.083301,0.173128
2,image_ben_4634.jpg,মানুষ যা দেখে প্রথম সিঙারা আবিষ্কারের ধারণা পায়,Positive,0.759429,0.742555,0.712545,0.699032,0.635443,0.69339,0.882138,0.905293,0.647585,0.875158,0.755257
3,image_ben_3192.png,এক মেয়ে ফ্যানে ওড়না পাচিয়ে সুই*সাইড করতে যাচ্ছ...,Negative,0.159403,0.180209,0.198676,0.208291,0.258787,0.221133,0.082621,0.077005,0.54259,0.086377,0.201509
4,image_ben_4836.jpg,No text detected,Negative,0.158329,0.168225,0.198251,0.208401,0.258507,0.220299,0.077872,0.122565,0.252441,0.079659,0.174455
5,image_ben_3907.jpg,বাঘের চামড়া দিয়ে বিড়াল ঢেকে রাখার একটি চিত্র।,Positive,0.707538,0.5532,0.206425,0.689686,0.258257,0.687782,0.857298,0.081603,0.542554,0.875199,0.545954
6,image_ben_4636.jpg,"আমার উপর রেগে আছো রিয়া? না, রেগে নেই তাহলে মুখ...",Negative,0.158023,0.168878,0.19936,0.208551,0.258904,0.219469,0.077874,0.074663,0.252714,0.077862,0.16963
7,image_ben_5080.jpg,নিঃস্ব উপেন তার দুই বিঘা জমি,Negative,0.158166,0.168277,0.197724,0.208321,0.258345,0.219638,0.077015,0.098507,0.257988,0.077395,0.172138
8,image_ben_3704.jpg,"চলো চিন্টু মজা হবে ! Yes, chumki তাড়াতাড়ি এসো ...",Negative,0.15932,0.168174,0.672235,0.208177,0.258125,0.219557,0.077326,0.078953,0.606166,0.07775,0.252578
9,image_ben_5071.jpg,No text detected,Negative,0.158329,0.168225,0.198251,0.208401,0.258507,0.220299,0.077872,0.122565,0.252441,0.079659,0.174455


In [21]:
subm_df[['Ids', params['label_col']]].to_csv('submission_task1.csv', index=False)

# TASK 2

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import logging
import random
from functools import partial
import sklearn
from sklearn.model_selection import StratifiedKFold
import datasets
import transformers
import torch
import copy
from torch import nn
import gc
import torch.nn.functional as F
from tqdm.auto import tqdm, trange
logger = logging.getLogger(__name__)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def set_seeds(self, seed=2023): 
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [4]:
# Source: https://huggingface.co/l3cube-pune/assamese-bert
params = dict(learning_rate=1.0e-5, seed=2023, data_seed=2023, n_splits=10,
              model_id='models--l3cube-pune--bengali-bert', tensorboard_dir='tensorboard', label_col='task_2',
              patience=5, metric='f1', metric_mode='max', train_batch_size=32, eval_batch_size=32,
              train_max_length=512, eval_max_length=512, learning_rate_scheduling='cosine_annealing',
              gradient_accumulation_steps=1, warmup_proportion=50, num_train_epochs=100, max_seq=512,
              fp16=False, logging_steps=None, model_dir='/model/', password='Bangla')

y_map = {0.0: 'Non-Sarcastic', 1.0: 'Sarcastic'}

In [5]:
set_seeds(params['seed'])

# Data Preparation

In [6]:
import getpass
import zipfile
from pathlib import Path
def prepare_data(path, password, train='Bangla_train_data.csv', test='bengali_test_data_wo_label.csv'):
    zfs = [zipfile.ZipFile(path / f.replace('.csv', '.zip')) for f in [train, test]]
    return [pd.read_csv(zfs[i].open(f, 'r', password)) for i, f in enumerate([train, test])]

def split(train_df, y, n_splits=5, data_seed=2023, shuffle=True):
    skf = StratifiedKFold(n_splits=n_splits, random_state=data_seed, shuffle=shuffle)
    train_df['split'] = -1
    for i, (train_index, test_index) in enumerate(skf.split(train_df.index, y)):
        train_df.loc[train_df.index.isin(test_index), 'split'] = i
    return train_df        

path = Path('Bangla')
password =  bytes(getpass.getpass(f"Input file password") if 'password' not in params else params['password'], 'utf-8')
train_df, test_df = prepare_data(path, password=password)
train_df = split(train_df, train_df['Sarcasm'], n_splits=params['n_splits'], data_seed=params['data_seed'])
train_df['task_2'] = train_df['Sarcasm'].map({v: k for k, v in y_map.items()})
print(len(train_df), len(test_df))

2693 1821


In [7]:
def prepare_feature(df, y_col, tokenizer, max_length):
    texts = df['OCR'].astype(str).tolist()
    features = tokenizer(texts, padding=False, truncation=True, return_tensors=None, max_length=max_length).data
    if y_col in df.columns:
        features['labels'] = df[y_col].tolist()
    return features

In [8]:
train_df.head()

Unnamed: 0,Ids,Sentiment,Sarcasm,Vulgar,Abuse,Target,OCR,split,task_2
0,image_ben_3635.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,I don't need any fucking GIRL বাঁরা কি সাইলেন...,4,1.0
1,image_ben_1586.png,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"After a little problem in relationship Boys, G...",0,1.0
2,image_ben_4040.jpg,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"মা জানো, পাশের বাড়ির চুলবুলি আমার থেকে বেশি অ...",6,1.0
3,image_ben_5074.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,Gf কে ফিঙ্গারং করে দেওয়ার পর যখন সে চরমসুখ অনু...,0,1.0
4,image_ben_7351.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,টিপতে টিপতে যখন ব্যাথ্যা অনুভব হয়,2,1.0


# Prepare Transformer Trainer

In [9]:
class Model(nn.Module):
    def __init__(self, transformer):
        super().__init__()
        self.transformer = transformer 
        
    def m_loss(self, logits, labels):
        return F.binary_cross_entropy_with_logits(logits.squeeze(-1), labels)
        
    def forward(self, **kwargs):
        labels = kwargs.pop("labels", None)
        output = self.transformer(**kwargs)
        output.loss = self.m_loss(output.logits, labels) if labels != None else output.loss
        output.logits = F.sigmoid(output.logits)
        return output
    
    def load(self, model_dir):
        self.transformer.from_pretrained(model_dir)
        print(f"Model loaded: {model_dir}")
    
    def save(self, model_dir):
        self.transformer.save_pretrained(model_dir)
    
class PTTrainer(transformers.Trainer):
    def __init__(self, **kwargs): 
        super().__init__(**kwargs)
        self.compute_metrics = self._metrics if self.eval_dataset is not None else None
        self._patience = 0

    def _metrics(self, eval_preds):
        """
        Evaluation metrics
        """
        threshold = 0.5
        thresholds = np.arange(0.01, 1.0, 0.01) 
        scores, labels = eval_preds
        acc, f1, thr = -1, -1, 0
        auc = sklearn.metrics.roc_auc_score(labels, scores)
        for threshold in thresholds:
            predictions = (scores >= threshold).astype(int)
            new_f1 = sklearn.metrics.f1_score(labels, predictions, average='macro')
            if new_f1 > f1:
                acc = sklearn.metrics.accuracy_score(labels, predictions)
                f1 = new_f1
                thr = threshold
        return {'auc': auc, 'accuracy': acc, 'f1': f1, 'thr': thr}
        
    def compute_loss(self, model, inputs, return_outputs=False):
        """
        forward/backward step
        """
        output = model(**inputs)
        return (output.loss, {'logits': output.logits}) if return_outputs else output.loss

    def _save_checkpoint(self, model, trial, metrics=None):
        # Determine the new best metric / best model checkpoint
        if metrics is not None and self.args.metric_for_best_model is not None:
            metric_to_check = self.args.metric_for_best_model
            if not metric_to_check.startswith("eval_"):
                metric_to_check = f"eval_{metric_to_check}"
            metric_value = metrics[metric_to_check]

            operator = np.greater if self.args.greater_is_better else np.less
            if (
                self.state.best_metric is None
                # or self.state.best_model_checkpoint is None
                or operator(metric_value, self.state.best_metric)
            ):
                self.state.best_metric = metric_value
                # self.state.best_model_checkpoint = self.args.output_dir
                model.save(self.args.output_dir)
                print(f'Model Saved: {metric_value}')
                self._patience = 0
            else:
                print(f"No improvments: {metric_value}, patience: {self._patience}, lr: {self.optimizer.param_groups[0]['lr']}")
                self._patience += 1
            # LR annealing
            if params['learning_rate_scheduling'].lower() == 'cosine_annealing':
                if (self._patience - 1) == int(round(params['patience'] / 2)):
                    if not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.CosineAnnealingLR):
                        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=10, eta_min=max(self.args.learning_rate * 0.01, 1.0e-7), verbose=False)
                        print(f'Starting CosineAnnealingLR schedular.')
                        self._patience -= 1
        else:
            model.save(self.args.output_dir)
            print('Model Saved')


class PTDefaultFlowCallback(transformers.DefaultFlowCallback):
    def on_epoch_end(self, args, state, control, **kwargs):
        control = super().on_epoch_end(args, state, control, **kwargs)
        # Evaluate
        if args.evaluation_strategy != transformers.IntervalStrategy.EPOCH \
            and state.epoch == args.num_train_epochs:
            control.should_evaluate = state.best_metric is not None
            control.should_save = True
        return control
    
def get_best_thr(history):
    max_i = -1
    for i, record in enumerate(history):
        if f"eval_{params['metric']}" in record:
            if max_i == -1 or record[f"eval_{params['metric']}"] > history[max_i][f"eval_{params['metric']}"]:
                max_i = i
    return history[max_i]['eval_thr'], history[max_i]['eval_f1']

def apply_thr(y, threshold, mapper=None):
    if mapper is None:
        return (y > threshold).astype(int)
    else:
        return [mapper[x] for x in (y > threshold).astype(int)]

def inference(pred_features, threshold=None):
    print(f"Threshold: {threshold}")
    result = trainer.predict(datasets.Dataset.from_dict(dict(pred_features))).predictions
    if threshold is None:
        test_df[params['label_col']] = result[:, 0]
    else:
        test_df[params['label_col']] = apply_thr(result[:, 0], threshold, y_map)
    return test_df[[col for col in test_df.columns if col != 'text']]

In [10]:
transformer = transformers.AutoModelForSequenceClassification.from_pretrained(params['model_id'], num_labels=1)
_tokenizer = transformers.AutoTokenizer.from_pretrained(params['model_id'], model_max_length=params['max_seq'])
callbacks = [PTDefaultFlowCallback(), 
             transformers.EarlyStoppingCallback(early_stopping_patience=params['patience'])]

if isinstance(params['logging_steps'], int):
    save_strategy = transformers.IntervalStrategy.STEPS
else:
    save_strategy = transformers.IntervalStrategy.EPOCH 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at models--l3cube-pune--bengali-bert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
def get_trainer(valid_split=0):
    _model = Model(copy.deepcopy(transformer))
    optimizer = torch.optim.AdamW(_model.parameters(), lr=params['learning_rate'])
    model_dir = f"{params['model_dir']}{valid_split}"
    t_df, v_df = train_df[train_df['split'] != valid_split], train_df[train_df['split'] == valid_split]
    print(f"Train: {len(t_df)}, Valid: {len(v_df)}")
    train_features = prepare_feature(t_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    valid_features = prepare_feature(v_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    trainer = PTTrainer(model=_model,
        train_dataset=datasets.Dataset.from_dict(dict(train_features)),
        eval_dataset=datasets.Dataset.from_dict(dict(valid_features)) if valid_features is not None else None,
        args=transformers.TrainingArguments(
            label_names=['labels'],
            seed=params['seed'],
            data_seed=params['data_seed'],
            remove_unused_columns=False,
            report_to="tensorboard" if params['tensorboard_dir'] is not None else "none",
            logging_dir=params['tensorboard_dir'],
            per_device_train_batch_size=params['train_batch_size'], 
            per_device_eval_batch_size=params['eval_batch_size'],
            gradient_accumulation_steps=params['gradient_accumulation_steps'],
            warmup_steps=params['warmup_proportion'], 
            # max_steps=20,
            num_train_epochs=params['num_train_epochs'],
            learning_rate=params['learning_rate'], 
            fp16=params['fp16'],
            metric_for_best_model=params['metric'],
            # gradient_checkpointing=fp16,
            # evaluate_during_training=True,
            logging_steps=params['logging_steps'], 
            output_dir=model_dir,
            evaluation_strategy=save_strategy,
            save_strategy=save_strategy,
            logging_strategy=save_strategy,
            save_total_limit=1,
            greater_is_better=params['metric_mode'], 
            load_best_model_at_end=True,
            eval_steps=params['logging_steps'],
            save_steps=params['logging_steps'],
            overwrite_output_dir=True,
            max_grad_norm=1.0,
        ),
        optimizers=[optimizer, transformers.get_constant_schedule(optimizer)],
        callbacks=callbacks,
        data_collator=transformers.DataCollatorWithPadding(_tokenizer),
    )
    return _model, trainer, model_dir

# Train / Inference

In [13]:
pred_features = prepare_feature(test_df, params['label_col'], _tokenizer, max_length=params['eval_max_length'])
subm_df = None
thresholds = []
scores = []
cols = []
for fold in range(params['n_splits']):
    print(f"Fold: {fold + 1} out of {params['n_splits']}")
    col = f'y_{fold}'
    model, trainer, model_dir = get_trainer(fold)
    trainer.train()
    trainer.model.load(model_dir)
    subm = inference(pred_features)
    t, f1 = get_best_thr(trainer.state.log_history)
    thresholds.append(t)
    scores.append(f1)
    if subm_df is None:
        subm_df = subm
    subm_df[col] = subm[params['label_col']]
    del model, trainer
    torch.cuda.empty_cache()
    gc.collect()
    cols.append(col)

Fold: 1 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6583,0.621271,0.565417,0.737037,0.450323,0.56
2,0.6031,0.586219,0.487019,0.725926,0.48827,0.63
3,0.5686,0.560498,0.532199,0.737037,0.450323,0.56
4,0.5445,0.546546,0.556789,0.744444,0.488454,0.7
5,0.5295,0.539703,0.523727,0.737037,0.494609,0.76
6,0.5175,0.539935,0.476547,0.733333,0.492481,0.76
7,0.5045,0.545379,0.561573,0.711111,0.521364,0.8
8,0.4887,0.518194,0.606401,0.72963,0.606579,0.59
9,0.4637,0.532188,0.652796,0.72963,0.611235,0.83
10,0.4364,0.542779,0.59887,0.740741,0.583186,0.82


Model Saved: 0.4503225806451613
Model Saved: 0.4882696445036369
No improvments: 0.4503225806451613, patience: 0, lr: 1e-05
Model Saved: 0.48845382904527856
Model Saved: 0.49460863146239226
No improvments: 0.49248120300751874, patience: 0, lr: 1e-05
Model Saved: 0.5213636363636364
Model Saved: 0.6065789736322083
Model Saved: 0.6112349356003077
No improvments: 0.5831863091037403, patience: 0, lr: 1e-05
Model Saved: 0.6290106951871658
No improvments: 0.6126568466993999, patience: 0, lr: 1e-05
No improvments: 0.536303787806507, patience: 1, lr: 1e-05
No improvments: 0.5634095634095634, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.6201096201096201, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.6039408866995074, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/0
Threshold: None


Fold: 2 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.658,0.620824,0.592781,0.77037,0.435146,0.01
2,0.603,0.583307,0.594176,0.77037,0.435146,0.01
3,0.5707,0.558221,0.604955,0.792593,0.571234,0.68
4,0.5495,0.542746,0.592199,0.777778,0.585083,0.72
5,0.5373,0.525831,0.587895,0.77037,0.579143,0.75
6,0.5281,0.522696,0.571534,0.792593,0.571234,0.59
7,0.5217,0.518757,0.549628,0.792593,0.571234,0.58
8,0.5183,0.514197,0.590881,0.781481,0.6168,0.69
9,0.5098,0.50808,0.602667,0.788889,0.635955,0.71
10,0.5043,0.51122,0.615462,0.792593,0.605263,0.78


Model Saved: 0.4351464435146444
No improvments: 0.4351464435146444, patience: 0, lr: 1e-05
Model Saved: 0.5712341197822142
Model Saved: 0.5850834955434894
No improvments: 0.5791432019308126, patience: 0, lr: 1e-05
No improvments: 0.5712341197822142, patience: 1, lr: 1e-05
No improvments: 0.5712341197822142, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6168001731976618
Model Saved: 0.6359550561797753
No improvments: 0.6052631578947368, patience: 0, lr: 1.045365877844024e-06
No improvments: 0.6151053013798111, patience: 1, lr: 6.579634122156521e-06
No improvments: 0.6200504658385093, patience: 2, lr: 9.999999999999958e-06
No improvments: 0.6118393234672305, patience: 3, lr: 6.579634122155324e-06
Model Saved: 0.6384707729245545
Model Saved: 0.6434888761553601
Model Saved: 0.6460887949260042
No improvments: 0.616364825548614, patience: 0, lr: 1.0000000000001578e-05
No improvments: 0.6127445958405902, patience: 1, lr: 6.579634122155887e-06
No improvments: 0.61

Fold: 3 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6592,0.622238,0.618603,0.77037,0.435146,0.01
2,0.6035,0.583569,0.699287,0.762963,0.539887,0.63
3,0.5703,0.557493,0.591928,0.725926,0.545909,0.68
4,0.5492,0.544548,0.560135,0.77037,0.435146,0.01
5,0.5392,0.539349,0.638609,0.766667,0.551554,0.75
6,0.5342,0.53009,0.561763,0.759259,0.577831,0.61
7,0.5295,0.526839,0.640354,0.766667,0.560181,0.74
8,0.5262,0.525963,0.651559,0.748148,0.582727,0.79
9,0.5258,0.525436,0.698627,0.737037,0.574104,0.78
10,0.5201,0.525022,0.729025,0.762963,0.587826,0.78


Model Saved: 0.4351464435146444
Model Saved: 0.5398871019277879
Model Saved: 0.5459090909090909
No improvments: 0.4351464435146444, patience: 0, lr: 1e-05
Model Saved: 0.5515541377764889
Model Saved: 0.5778306992855596
No improvments: 0.5601809954751131, patience: 0, lr: 1e-05
Model Saved: 0.5827272727272728
No improvments: 0.5741041078847393, patience: 0, lr: 1e-05
Model Saved: 0.5878255891613395
No improvments: 0.5791432019308126, patience: 0, lr: 1e-05
Model Saved: 0.6757973733583489
No improvments: 0.6285437199756423, patience: 0, lr: 1e-05
No improvments: 0.6620493991989319, patience: 1, lr: 1e-05
No improvments: 0.6509372979961215, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6881496881496881
No improvments: 0.6816037735849056, patience: 0, lr: 1.0453658778440268e-06
No improvments: 0.6507843791722296, patience: 1, lr: 1.045365877844024e-06
No improvments: 0.6816037735849056, patience: 2, lr: 6.579634122156521e-06
No improvments: 0.6664195700518902,

Fold: 4 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6593,0.621477,0.592331,0.773234,0.436059,0.01
2,0.605,0.583099,0.56849,0.773234,0.436059,0.01
3,0.5715,0.556314,0.579327,0.747212,0.56891,0.68
4,0.5506,0.54113,0.602853,0.773234,0.436059,0.01
5,0.5393,0.532666,0.5705,0.739777,0.588793,0.69
6,0.5335,0.521666,0.573652,0.765799,0.583376,0.68
7,0.5269,0.519977,0.591622,0.776952,0.576778,0.57
8,0.5197,0.518794,0.583819,0.702602,0.594268,0.78
9,0.5095,0.51074,0.631384,0.758364,0.609753,0.8
10,0.4967,0.512203,0.636625,0.769517,0.613219,0.62


Model Saved: 0.4360587002096436
No improvments: 0.4360587002096436, patience: 0, lr: 1e-05
Model Saved: 0.5689102564102564
No improvments: 0.4360587002096436, patience: 0, lr: 1e-05
Model Saved: 0.5887928022361985
No improvments: 0.5833763551884357, patience: 0, lr: 1e-05
No improvments: 0.5767778477029578, patience: 1, lr: 1e-05
Model Saved: 0.5942684766214178
Model Saved: 0.6097533757393148
Model Saved: 0.6132189239332096
Model Saved: 0.6192237442922375
Model Saved: 0.6325216297677646
No improvments: 0.6192237442922375, patience: 0, lr: 1e-05
Model Saved: 0.6428047644529005
No improvments: 0.6272517321016167, patience: 0, lr: 1e-05
No improvments: 0.6315068493150685, patience: 1, lr: 1e-05
No improvments: 0.573015873015873, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6450250725785167
No improvments: 0.6405591530475897, patience: 0, lr: 1.0453658778440268e-06
Model Saved: 0.6483085471482268
No improvments: 0.6424140375753279, patience: 0, lr: 6.57963412

Fold: 5 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6601,0.620922,0.568451,0.773234,0.436059,0.01
2,0.6035,0.581066,0.571682,0.773234,0.436059,0.01
3,0.5696,0.553702,0.574874,0.776952,0.529939,0.68
4,0.5484,0.534743,0.560805,0.769517,0.578838,0.72
5,0.5348,0.526308,0.560254,0.747212,0.56891,0.75
6,0.5222,0.521194,0.574559,0.780669,0.571251,0.76
7,0.5063,0.513219,0.587169,0.758364,0.603847,0.78
8,0.4901,0.522022,0.56648,0.724907,0.593357,0.78
9,0.4593,0.524691,0.577278,0.758364,0.603847,0.78
10,0.444,0.523466,0.578696,0.784387,0.55565,0.47


Model Saved: 0.4360587002096436
No improvments: 0.4360587002096436, patience: 0, lr: 1e-05
Model Saved: 0.5299394221808015
Model Saved: 0.5788383838383838
No improvments: 0.5689102564102564, patience: 0, lr: 1e-05
No improvments: 0.5712510468163277, patience: 1, lr: 1e-05
Model Saved: 0.6038471124000272
No improvments: 0.5933567576401373, patience: 0, lr: 1e-05
No improvments: 0.6038471124000272, patience: 1, lr: 1e-05
No improvments: 0.5556504898610162, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.5857381988617342, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.5940643863179075, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/4
Threshold: None


Fold: 6 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6596,0.621383,0.541732,0.773234,0.436059,0.01
2,0.6054,0.583304,0.560569,0.773234,0.436059,0.01
3,0.5721,0.556687,0.567544,0.773234,0.436059,0.01
4,0.551,0.542428,0.554067,0.743494,0.517959,0.72
5,0.5378,0.528526,0.577002,0.765799,0.559875,0.74
6,0.5448,0.532913,0.55785,0.773234,0.573848,0.77
7,0.5302,0.532829,0.559269,0.728625,0.540945,0.66
8,0.5258,0.526886,0.578815,0.736059,0.579919,0.63
9,0.5131,0.516272,0.62098,0.762082,0.618575,0.77
10,0.4918,0.523314,0.653058,0.72119,0.604761,0.56


Model Saved: 0.4360587002096436
No improvments: 0.4360587002096436, patience: 0, lr: 1e-05
No improvments: 0.4360587002096436, patience: 1, lr: 1e-05
Model Saved: 0.5179587066614726
Model Saved: 0.5598753408648227
Model Saved: 0.5738475522659394
No improvments: 0.5409449002968885, patience: 0, lr: 1e-05
Model Saved: 0.5799186187176949
Model Saved: 0.6185749734136831
No improvments: 0.6047605054363796, patience: 0, lr: 1e-05
No improvments: 0.6114698385726423, patience: 1, lr: 1e-05
No improvments: 0.5886850152905199, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.6166163648558015, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.5915613438549219, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/5
Threshold: None


Fold: 7 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6584,0.620905,0.624133,0.773234,0.436059,0.01
2,0.604,0.581707,0.571642,0.773234,0.436059,0.01
3,0.5698,0.555803,0.503074,0.743494,0.477111,0.68
4,0.549,0.543363,0.513871,0.657993,0.517921,0.72
5,0.535,0.539165,0.505951,0.713755,0.523412,0.69
6,0.5238,0.536132,0.512453,0.698885,0.540287,0.77
7,0.5094,0.533336,0.487153,0.750929,0.540663,0.78
8,0.4932,0.531179,0.480454,0.747212,0.554202,0.67
9,0.4702,0.541123,0.586026,0.743494,0.558964,0.81
10,0.4367,0.559322,0.544609,0.713755,0.584585,0.84


Model Saved: 0.4360587002096436
No improvments: 0.4360587002096436, patience: 0, lr: 1e-05
Model Saved: 0.4771107417528242
Model Saved: 0.5179211469534051
Model Saved: 0.5234117944824095
Model Saved: 0.5402873599594911
Model Saved: 0.5406631495782043
Model Saved: 0.5542015987521934
Model Saved: 0.5589640014256861
Model Saved: 0.5845851467078478
No improvments: 0.576587515740241, patience: 0, lr: 1e-05
Model Saved: 0.6073799979442902
No improvments: 0.5977457873005245, patience: 0, lr: 1e-05
No improvments: 0.5922518330513254, patience: 1, lr: 1e-05
No improvments: 0.6030666581125297, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6114698385726423
No improvments: 0.6114698385726423, patience: 0, lr: 1.0453658778440268e-06
Model Saved: 0.6197601871709885
Model Saved: 0.6237906393087725
No improvments: 0.5973149578234526, patience: 0, lr: 9.999999999999958e-06
No improvments: 0.6003985507246377, patience: 1, lr: 6.579634122155324e-06
No improvments: 0.62296812

Fold: 8 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6595,0.620962,0.674338,0.773234,0.436059,0.01
2,0.6045,0.582489,0.677885,0.747212,0.538081,0.63
3,0.5714,0.555552,0.699401,0.762082,0.530131,0.67
4,0.5483,0.538311,0.681155,0.750929,0.564504,0.71
5,0.5324,0.528206,0.662279,0.684015,0.566631,0.76
6,0.5201,0.524984,0.703657,0.72119,0.595375,0.77
7,0.5122,0.514363,0.665392,0.773234,0.649088,0.79
8,0.492,0.521837,0.635561,0.754647,0.588265,0.81
9,0.4858,0.510468,0.705746,0.776952,0.631507,0.82
10,0.4602,0.516718,0.664762,0.776952,0.606648,0.8


Model Saved: 0.4360587002096436
Model Saved: 0.538080808080808
No improvments: 0.5301310043668123, patience: 0, lr: 1e-05
Model Saved: 0.5645040473601546
Model Saved: 0.566630655218817
Model Saved: 0.5953751428972544
Model Saved: 0.649087915142961
No improvments: 0.588265306122449, patience: 0, lr: 1e-05
No improvments: 0.6315068493150685, patience: 1, lr: 1e-05
No improvments: 0.6066484694872294, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6812796208530806
No improvments: 0.6652714932126698, patience: 0, lr: 1.0453658778440268e-06
No improvments: 0.65346730879759, patience: 1, lr: 1.045365877844024e-06
No improvments: 0.6654228855721394, patience: 2, lr: 6.579634122156521e-06
No improvments: 0.6786140979689367, patience: 3, lr: 9.999999999999958e-06
Model Saved: 0.6817263410816026
No improvments: 0.6683561643835616, patience: 0, lr: 1.045365877844104e-06
No improvments: 0.6315068493150685, patience: 1, lr: 1.0453658778441719e-06
No improvments: 0.675326

Fold: 9 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6599,0.621065,0.617158,0.773234,0.436059,0.01
2,0.6034,0.58145,0.599779,0.765799,0.522579,0.63
3,0.5697,0.555004,0.583701,0.773234,0.436059,0.01
4,0.5483,0.537192,0.516157,0.773234,0.547475,0.66
5,0.5356,0.528615,0.617119,0.750929,0.578675,0.71
6,0.5278,0.528326,0.53456,0.769517,0.553969,0.61
7,0.5188,0.534853,0.610695,0.747212,0.52943,0.57
8,0.5151,0.532931,0.628704,0.754647,0.54327,0.79
9,0.4984,0.536253,0.5705,0.750929,0.540663,0.75
10,0.4902,0.539093,0.583662,0.754647,0.525193,0.81


Model Saved: 0.4360587002096436
Model Saved: 0.5225793729047525
No improvments: 0.4360587002096436, patience: 0, lr: 1e-05
Model Saved: 0.547475248889992
Model Saved: 0.5786754564368704
No improvments: 0.5539687633718442, patience: 0, lr: 1e-05
No improvments: 0.5294299238526445, patience: 1, lr: 1e-05
No improvments: 0.5432702202099198, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.5406631495782043, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.5251925545571245, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/8
Threshold: None


Fold: 10 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6592,0.621044,0.640211,0.754647,0.515448,0.57
2,0.6044,0.581998,0.645846,0.754647,0.525193,0.63
3,0.5707,0.555202,0.657393,0.773234,0.436059,0.01
4,0.5507,0.540043,0.614518,0.747212,0.538081,0.7
5,0.5373,0.533872,0.657432,0.754647,0.515448,0.58
6,0.5293,0.531947,0.538422,0.754647,0.515448,0.59
7,0.5287,0.528236,0.656447,0.710037,0.595816,0.78
8,0.5266,0.532769,0.526403,0.750929,0.513116,0.6
9,0.5268,0.531552,0.628941,0.754647,0.525193,0.78
10,0.5202,0.517353,0.627916,0.732342,0.609201,0.66


Model Saved: 0.5154475982532751
Model Saved: 0.5251925545571245
No improvments: 0.4360587002096436, patience: 0, lr: 1e-05
Model Saved: 0.538080808080808
No improvments: 0.5154475982532751, patience: 0, lr: 1e-05
No improvments: 0.5154475982532751, patience: 1, lr: 1e-05
Model Saved: 0.5958159963014331
No improvments: 0.5131155955371856, patience: 0, lr: 1e-05
No improvments: 0.5251925545571245, patience: 1, lr: 1e-05
Model Saved: 0.6092009685230024
Model Saved: 0.6166163648558015
No improvments: 0.5794684731631058, patience: 0, lr: 1e-05
No improvments: 0.5799062988027069, patience: 1, lr: 1e-05
No improvments: 0.5933567576401373, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.5977457873005245, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.5589640014256861, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/9
Threshold: None


In [14]:
thresholds, scores

([0.85,
  0.81,
  0.63,
  0.9400000000000001,
  0.78,
  0.77,
  0.93,
  0.88,
  0.7100000000000001,
  0.8],
 [0.6290106951871658,
  0.6460887949260042,
  0.6881496881496881,
  0.6526230831315577,
  0.6038471124000272,
  0.6185749734136831,
  0.6237906393087725,
  0.6817263410816026,
  0.5786754564368704,
  0.6166163648558015])

In [15]:
thr = np.array(thresholds).mean()
print(f"Best threshold: {thr}")
print(f"Best score: {np.array(scores).mean()}")
subm_df['avg'] = subm_df[cols].apply('mean', axis=1)
subm_df[params['label_col']] = apply_thr(subm_df['avg'], thr, y_map)

Best threshold: 0.8100000000000002
Best score: 0.6339103148891172


In [16]:
subm_df

Unnamed: 0,Ids,OCR,task_2,y_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,avg
0,image_ben_1422.png,বৌ-এর প্রেগনেন্সি রিপোর্ট পজিটিভ জানার পর বর চ...,Sarcastic,0.920285,0.884400,0.890145,0.968584,0.863905,0.866582,0.947374,0.917455,0.813932,0.878986,0.895165
1,image_ben_3908.jpg,কিন্তু মিষ্টতা নেই,Non-Sarcastic,0.920171,0.884323,0.889996,0.968575,0.863596,0.280780,0.947541,0.180733,0.813750,0.878866,0.762833
2,image_ben_4634.jpg,মানুষ যা দেখে প্রথম সিঙারা আবিষ্কারের ধারণা পায়,Sarcastic,0.920496,0.884416,0.890133,0.966867,0.601943,0.866519,0.946761,0.918940,0.813926,0.878867,0.868887
3,image_ben_3192.png,এক মেয়ে ফ্যানে ওড়না পাচিয়ে সুই*সাইড করতে যাচ্ছ...,Sarcastic,0.912886,0.884368,0.890058,0.870699,0.863367,0.866702,0.943914,0.917021,0.813937,0.879009,0.884196
4,image_ben_4836.jpg,No text detected,Non-Sarcastic,0.200692,0.597275,0.516847,0.587422,0.508416,0.545424,0.550833,0.540903,0.418527,0.623912,0.509025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1816,image_ben_7561.jpg,বিদায় লেজেন্ড ফুটবল খেলা দেখার মজা টাই হয়ত হার...,Sarcastic,0.920412,0.884454,0.890100,0.968359,0.863496,0.867102,0.947377,0.918897,0.813995,0.878642,0.895284
1817,image_ben_3223.png,শীতে ঘটিদের পানীয় শীতে বাঙালদের পানীয়,Sarcastic,0.920468,0.884379,0.890132,0.968765,0.864004,0.866863,0.947568,0.918957,0.813825,0.878208,0.895317
1818,image_ben_3254.png,ঘটি বাঙীল ভাষায় খিস্তি দিয়ে একে বাংলা করে দে...,Sarcastic,0.920408,0.884412,0.890129,0.968877,0.863928,0.866756,0.947464,0.918890,0.813946,0.878511,0.895332
1819,image_ben_74.png,FAU-G FEARLESS AND UNITED GUARDS,Non-Sarcastic,0.919542,0.879495,0.248819,0.072251,0.863730,0.859202,0.109765,0.180740,0.813701,0.257382,0.520463


In [17]:
subm_df[['Ids', params['label_col']]].to_csv('submission_task2.csv', index=False)

# TASK 3

In [57]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import logging
import random
from functools import partial
import sklearn
from sklearn.model_selection import StratifiedKFold
import datasets
import transformers
import torch
import copy
from torch import nn
import gc
import torch.nn.functional as F
from tqdm.auto import tqdm, trange
logger = logging.getLogger(__name__)

In [58]:
def set_seeds(self, seed=2023): 
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [59]:
# Source: https://huggingface.co/l3cube-pune/assamese-bert
params = dict(learning_rate=1.0e-5, seed=2023, data_seed=2023, n_splits=10,
              model_id='models--l3cube-pune--bengali-bert', tensorboard_dir='tensorboard', label_col='task_3',
              patience=5, metric='f1', metric_mode='max', train_batch_size=32, eval_batch_size=32,
              train_max_length=512, eval_max_length=512, learning_rate_scheduling='cosine_annealing',
              gradient_accumulation_steps=1, warmup_proportion=50, num_train_epochs=100, max_seq=512,
              fp16=False, logging_steps=None, model_dir='/model/', password='Bangla')

y_map = {0.0: 'Non Vulgar', 1.0: 'Vulgar'}

In [60]:
set_seeds(params['seed'])

# Data Preparation

In [61]:
import getpass
import zipfile
from pathlib import Path
def prepare_data(path, password, train='Bangla_train_data.csv', test='bengali_test_data_wo_label.csv'):
    zfs = [zipfile.ZipFile(path / f.replace('.csv', '.zip')) for f in [train, test]]
    return [pd.read_csv(zfs[i].open(f, 'r', password)) for i, f in enumerate([train, test])]

def split(train_df, y, n_splits=5, data_seed=2023, shuffle=True):
    skf = StratifiedKFold(n_splits=n_splits, random_state=data_seed, shuffle=shuffle)
    train_df['split'] = -1
    for i, (train_index, test_index) in enumerate(skf.split(train_df.index, y)):
        train_df.loc[train_df.index.isin(test_index), 'split'] = i
    return train_df        

path = Path('Bangla')
password =  bytes(getpass.getpass(f"Input file password") if 'password' not in params else params['password'], 'utf-8')
train_df, test_df = prepare_data(path, password=password)
train_df = split(train_df, train_df['Vulgar'], n_splits=params['n_splits'], data_seed=params['data_seed'])
train_df['task_3'] = train_df['Vulgar'].map({v: k for k, v in y_map.items()})
print(len(train_df), len(test_df))

2693 1821


In [62]:
def prepare_feature(df, y_col, tokenizer, max_length):
    texts = df['OCR'].astype(str).tolist()
    features = tokenizer(texts, padding=False, truncation=True, return_tensors=None, max_length=max_length).data
    if y_col in df.columns:
        features['labels'] = df[y_col].tolist()
    return features

In [63]:
train_df.head()

Unnamed: 0,Ids,Sentiment,Sarcasm,Vulgar,Abuse,Target,OCR,split,task_3
0,image_ben_3635.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,I don't need any fucking GIRL বাঁরা কি সাইলেন...,1,0.0
1,image_ben_1586.png,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"After a little problem in relationship Boys, G...",6,0.0
2,image_ben_4040.jpg,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"মা জানো, পাশের বাড়ির চুলবুলি আমার থেকে বেশি অ...",1,0.0
3,image_ben_5074.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,Gf কে ফিঙ্গারং করে দেওয়ার পর যখন সে চরমসুখ অনু...,3,0.0
4,image_ben_7351.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,টিপতে টিপতে যখন ব্যাথ্যা অনুভব হয়,0,1.0


# Prepare Transformer Trainer

In [64]:
class Model(nn.Module):
    def __init__(self, transformer):
        super().__init__()
        self.transformer = transformer 
        
    def m_loss(self, logits, labels):
        return F.binary_cross_entropy_with_logits(logits.squeeze(-1), labels)
        
    def forward(self, **kwargs):
        labels = kwargs.pop("labels", None)
        output = self.transformer(**kwargs)
        output.loss = self.m_loss(output.logits, labels) if labels != None else output.loss
        output.logits = F.sigmoid(output.logits)
        return output
    
    def load(self, model_dir):
        self.transformer.from_pretrained(model_dir)
        print(f"Model loaded: {model_dir}")
    
    def save(self, model_dir):
        self.transformer.save_pretrained(model_dir)
    
class PTTrainer(transformers.Trainer):
    def __init__(self, **kwargs): 
        super().__init__(**kwargs)
        self.compute_metrics = self._metrics if self.eval_dataset is not None else None
        self._patience = 0

    def _metrics(self, eval_preds):
        """
        Evaluation metrics
        """
        threshold = 0.5
        thresholds = np.arange(0.01, 1.0, 0.01) 
        scores, labels = eval_preds
        acc, f1, thr = -1, -1, 0
        auc = sklearn.metrics.roc_auc_score(labels, scores)
        for threshold in thresholds:
            predictions = (scores >= threshold).astype(int)
            new_f1 = sklearn.metrics.f1_score(labels, predictions, average='macro')
            if new_f1 > f1:
                acc = sklearn.metrics.accuracy_score(labels, predictions)
                f1 = new_f1
                thr = threshold
        return {'auc': auc, 'accuracy': acc, 'f1': f1, 'thr': thr}
        
    def compute_loss(self, model, inputs, return_outputs=False):
        """
        forward/backward step
        """
        output = model(**inputs)
        return (output.loss, {'logits': output.logits}) if return_outputs else output.loss

    def _save_checkpoint(self, model, trial, metrics=None):
        # Determine the new best metric / best model checkpoint
        if metrics is not None and self.args.metric_for_best_model is not None:
            metric_to_check = self.args.metric_for_best_model
            if not metric_to_check.startswith("eval_"):
                metric_to_check = f"eval_{metric_to_check}"
            metric_value = metrics[metric_to_check]

            operator = np.greater if self.args.greater_is_better else np.less
            if (
                self.state.best_metric is None
                # or self.state.best_model_checkpoint is None
                or operator(metric_value, self.state.best_metric)
            ):
                self.state.best_metric = metric_value
                # self.state.best_model_checkpoint = self.args.output_dir
                model.save(self.args.output_dir)
                print(f'Model Saved: {metric_value}')
                self._patience = 0
            else:
                print(f"No improvments: {metric_value}, patience: {self._patience}, lr: {self.optimizer.param_groups[0]['lr']}")
                self._patience += 1
            # LR annealing
            if params['learning_rate_scheduling'].lower() == 'cosine_annealing':
                if (self._patience - 1) == int(round(params['patience'] / 2)):
                    if not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.CosineAnnealingLR):
                        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=10, eta_min=max(self.args.learning_rate * 0.01, 1.0e-7), verbose=False)
                        print(f'Starting CosineAnnealingLR schedular.')
                        self._patience -= 1
        else:
            model.save(self.args.output_dir)
            print('Model Saved')


class PTDefaultFlowCallback(transformers.DefaultFlowCallback):
    def on_epoch_end(self, args, state, control, **kwargs):
        control = super().on_epoch_end(args, state, control, **kwargs)
        # Evaluate
        if args.evaluation_strategy != transformers.IntervalStrategy.EPOCH \
            and state.epoch == args.num_train_epochs:
            control.should_evaluate = state.best_metric is not None
            control.should_save = True
        return control
    
def get_best_thr(history):
    max_i = -1
    for i, record in enumerate(history):
        if f"eval_{params['metric']}" in record:
            if max_i == -1 or record[f"eval_{params['metric']}"] > history[max_i][f"eval_{params['metric']}"]:
                max_i = i
    return history[max_i]['eval_thr'], history[max_i]['eval_f1']

def apply_thr(y, threshold, mapper=None):
    if mapper is None:
        return (y > threshold).astype(int)
    else:
        return [mapper[x] for x in (y > threshold).astype(int)]

def inference(pred_features, threshold=None):
    print(f"Threshold: {threshold}")
    result = trainer.predict(datasets.Dataset.from_dict(dict(pred_features))).predictions
    if threshold is None:
        test_df[params['label_col']] = result[:, 0]
    else:
        test_df[params['label_col']] = apply_thr(result[:, 0], threshold, y_map)
    return test_df[[col for col in test_df.columns if col != 'text']]

In [65]:
transformer = transformers.AutoModelForSequenceClassification.from_pretrained(params['model_id'], num_labels=1)
_tokenizer = transformers.AutoTokenizer.from_pretrained(params['model_id'], model_max_length=params['max_seq'])
callbacks = [PTDefaultFlowCallback(), 
             transformers.EarlyStoppingCallback(early_stopping_patience=params['patience'])]

if isinstance(params['logging_steps'], int):
    save_strategy = transformers.IntervalStrategy.STEPS
else:
    save_strategy = transformers.IntervalStrategy.EPOCH 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at models--l3cube-pune--bengali-bert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [66]:
def get_trainer(valid_split=0):
    _model = Model(copy.deepcopy(transformer))
    optimizer = torch.optim.AdamW(_model.parameters(), lr=params['learning_rate'])
    model_dir = f"{params['model_dir']}{valid_split}"
    t_df, v_df = train_df[train_df['split'] != valid_split], train_df[train_df['split'] == valid_split]
    print(f"Train: {len(t_df)}, Valid: {len(v_df)}")
    train_features = prepare_feature(t_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    valid_features = prepare_feature(v_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    trainer = PTTrainer(model=_model,
        train_dataset=datasets.Dataset.from_dict(dict(train_features)),
        eval_dataset=datasets.Dataset.from_dict(dict(valid_features)) if valid_features is not None else None,
        args=transformers.TrainingArguments(
            label_names=['labels'],
            seed=params['seed'],
            data_seed=params['data_seed'],
            remove_unused_columns=False,
            report_to="tensorboard" if params['tensorboard_dir'] is not None else "none",
            logging_dir=params['tensorboard_dir'],
            per_device_train_batch_size=params['train_batch_size'], 
            per_device_eval_batch_size=params['eval_batch_size'],
            gradient_accumulation_steps=params['gradient_accumulation_steps'],
            warmup_steps=params['warmup_proportion'], 
            # max_steps=20,
            num_train_epochs=params['num_train_epochs'],
            learning_rate=params['learning_rate'], 
            fp16=params['fp16'],
            metric_for_best_model=params['metric'],
            # gradient_checkpointing=fp16,
            # evaluate_during_training=True,
            logging_steps=params['logging_steps'], 
            output_dir=model_dir,
            evaluation_strategy=save_strategy,
            save_strategy=save_strategy,
            logging_strategy=save_strategy,
            save_total_limit=1,
            greater_is_better=params['metric_mode'], 
            load_best_model_at_end=True,
            eval_steps=params['logging_steps'],
            save_steps=params['logging_steps'],
            overwrite_output_dir=True,
            max_grad_norm=1.0,
        ),
        optimizers=[optimizer, transformers.get_constant_schedule(optimizer)],
        callbacks=callbacks,
        data_collator=transformers.DataCollatorWithPadding(_tokenizer),
    )
    return _model, trainer, model_dir

# Train / Inference

In [67]:
pred_features = prepare_feature(test_df, params['label_col'], _tokenizer, max_length=params['eval_max_length'])
subm_df = None
thresholds = []
scores = []
cols = []
for fold in range(params['n_splits']):
    print(f"Fold: {fold + 1} out of {params['n_splits']}")
    col = f'y_{fold}'
    model, trainer, model_dir = get_trainer(fold)
    trainer.train()
    trainer.model.load(model_dir)
    subm = inference(pred_features)
    t, f1 = get_best_thr(trainer.state.log_history)
    thresholds.append(t)
    scores.append(f1)
    if subm_df is None:
        subm_df = subm
    subm_df[col] = subm[params['label_col']]
    del model, trainer
    torch.cuda.empty_cache()
    gc.collect()
    cols.append(col)

Fold: 1 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6547,0.609863,0.477006,0.825926,0.452333,0.44
2,0.5849,0.555144,0.455539,0.825926,0.452333,0.38
3,0.535,0.510675,0.512117,0.825926,0.452333,0.32
4,0.4969,0.482621,0.493369,0.825926,0.452333,0.26
5,0.4792,0.474312,0.485784,0.825926,0.452333,0.24
6,0.4717,0.469238,0.470613,0.825926,0.452333,0.23


Model Saved: 0.45233265720081134
No improvments: 0.45233265720081134, patience: 0, lr: 1e-05
No improvments: 0.45233265720081134, patience: 1, lr: 1e-05
No improvments: 0.45233265720081134, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.45233265720081134, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.45233265720081134, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/0
Threshold: None


Fold: 2 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6556,0.609786,0.539882,0.822222,0.489281,0.43
2,0.5853,0.554501,0.6029,0.785185,0.524012,0.37
3,0.5336,0.509794,0.563591,0.825926,0.452333,0.31
4,0.4956,0.481404,0.544127,0.825926,0.452333,0.26
5,0.4746,0.468264,0.544318,0.825926,0.452333,0.22
6,0.4667,0.465593,0.54346,0.825926,0.452333,0.21
7,0.4649,0.463726,0.523185,0.825926,0.452333,0.2


Model Saved: 0.48928121059268603
Model Saved: 0.5240121580547112
No improvments: 0.45233265720081134, patience: 0, lr: 1e-05
No improvments: 0.45233265720081134, patience: 1, lr: 1e-05
No improvments: 0.45233265720081134, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.45233265720081134, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.45233265720081134, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/1
Threshold: None


Fold: 3 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6568,0.610925,0.420046,0.825926,0.452333,0.44
2,0.5857,0.554908,0.473905,0.825926,0.452333,0.38
3,0.5348,0.510559,0.47214,0.825926,0.452333,0.32
4,0.4966,0.48166,0.475289,0.825926,0.452333,0.26
5,0.4782,0.47342,0.455014,0.825926,0.452333,0.24
6,0.4711,0.468323,0.525093,0.825926,0.452333,0.22


Model Saved: 0.45233265720081134
No improvments: 0.45233265720081134, patience: 0, lr: 1e-05
No improvments: 0.45233265720081134, patience: 1, lr: 1e-05
No improvments: 0.45233265720081134, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.45233265720081134, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.45233265720081134, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/2
Threshold: None


Fold: 4 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6547,0.608944,0.462615,0.806691,0.497557,0.43
2,0.5858,0.553799,0.45038,0.828996,0.453252,0.38
3,0.536,0.508628,0.46164,0.828996,0.453252,0.32
4,0.4982,0.479113,0.457594,0.828996,0.453252,0.26
5,0.4793,0.470743,0.453841,0.828996,0.453252,0.24
6,0.4727,0.464655,0.498343,0.828996,0.453252,0.22


Model Saved: 0.49755747126436783
No improvments: 0.4532520325203252, patience: 0, lr: 1e-05
No improvments: 0.4532520325203252, patience: 1, lr: 1e-05
No improvments: 0.4532520325203252, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4532520325203252, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4532520325203252, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/3
Threshold: None


Fold: 5 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.656,0.609813,0.492445,0.802974,0.510018,0.43
2,0.5857,0.553012,0.483281,0.67658,0.506235,0.37
3,0.5351,0.508892,0.502291,0.828996,0.453252,0.32
4,0.4971,0.478571,0.508676,0.828996,0.453252,0.26
5,0.4792,0.470303,0.501803,0.828996,0.453252,0.24
6,0.472,0.464669,0.503314,0.828996,0.453252,0.22


Model Saved: 0.5100182149362477
No improvments: 0.5062345718083423, patience: 0, lr: 1e-05
No improvments: 0.4532520325203252, patience: 1, lr: 1e-05
No improvments: 0.4532520325203252, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4532520325203252, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4532520325203252, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/4
Threshold: None


Fold: 6 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6568,0.60957,0.54265,0.828996,0.453252,0.44
2,0.5863,0.554636,0.545233,0.828996,0.453252,0.38
3,0.5354,0.509142,0.499659,0.828996,0.453252,0.32
4,0.4973,0.478869,0.492201,0.828996,0.453252,0.26
5,0.4788,0.470469,0.512917,0.828996,0.453252,0.24
6,0.4724,0.4647,0.48255,0.828996,0.453252,0.23


Model Saved: 0.4532520325203252
No improvments: 0.4532520325203252, patience: 0, lr: 1e-05
No improvments: 0.4532520325203252, patience: 1, lr: 1e-05
No improvments: 0.4532520325203252, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4532520325203252, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4532520325203252, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/5
Threshold: None


Fold: 7 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6555,0.610473,0.476519,0.825279,0.452138,0.44
2,0.5841,0.554825,0.426059,0.825279,0.452138,0.38
3,0.5339,0.510517,0.501198,0.825279,0.452138,0.31
4,0.4974,0.482798,0.529902,0.825279,0.452138,0.26
5,0.4785,0.475189,0.540205,0.825279,0.452138,0.24
6,0.472,0.469432,0.519599,0.825279,0.452138,0.22


Model Saved: 0.45213849287169044
No improvments: 0.45213849287169044, patience: 0, lr: 1e-05
No improvments: 0.45213849287169044, patience: 1, lr: 1e-05
No improvments: 0.45213849287169044, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.45213849287169044, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.45213849287169044, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/6
Threshold: None


Fold: 8 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6551,0.610643,0.496454,0.817844,0.4693,0.43
2,0.5855,0.555201,0.493962,0.828996,0.473895,0.38
3,0.5351,0.510225,0.522858,0.828996,0.473895,0.32
4,0.4976,0.482943,0.526692,0.825279,0.452138,0.26
5,0.4759,0.469241,0.521181,0.825279,0.472351,0.42
6,0.4676,0.46669,0.499617,0.825279,0.452138,0.21
7,0.4648,0.464652,0.514903,0.825279,0.452138,0.2


Model Saved: 0.46929983492370253
Model Saved: 0.4738945578231292
No improvments: 0.4738945578231292, patience: 0, lr: 1e-05
No improvments: 0.45213849287169044, patience: 1, lr: 1e-05
No improvments: 0.4723509035516047, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.45213849287169044, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.45213849287169044, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/7
Threshold: None


Fold: 9 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6559,0.610724,0.595888,0.825279,0.452138,0.44
2,0.586,0.555806,0.533353,0.825279,0.452138,0.38
3,0.5344,0.511602,0.517539,0.825279,0.452138,0.32
4,0.4964,0.482733,0.486918,0.825279,0.452138,0.26
5,0.4789,0.474619,0.494297,0.825279,0.452138,0.24
6,0.4717,0.469285,0.508578,0.825279,0.452138,0.22


Model Saved: 0.45213849287169044
No improvments: 0.45213849287169044, patience: 0, lr: 1e-05
No improvments: 0.45213849287169044, patience: 1, lr: 1e-05
No improvments: 0.45213849287169044, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.45213849287169044, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.45213849287169044, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/8
Threshold: None


Fold: 10 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6552,0.610614,0.415181,0.825279,0.472351,0.43
2,0.5857,0.556068,0.464827,0.825279,0.452138,0.38
3,0.5351,0.511486,0.441441,0.825279,0.452138,0.32
4,0.4974,0.483309,0.438662,0.825279,0.452138,0.26
5,0.4792,0.475287,0.519791,0.825279,0.452138,0.24
6,0.4721,0.469609,0.464922,0.825279,0.452138,0.23


Model Saved: 0.4723509035516047
No improvments: 0.45213849287169044, patience: 0, lr: 1e-05
No improvments: 0.45213849287169044, patience: 1, lr: 1e-05
No improvments: 0.45213849287169044, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.45213849287169044, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.45213849287169044, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/9
Threshold: None


In [68]:
thresholds, scores

([0.44, 0.37, 0.44, 0.43, 0.43, 0.44, 0.44, 0.38, 0.44, 0.43],
 [0.45233265720081134,
  0.5240121580547112,
  0.45233265720081134,
  0.49755747126436783,
  0.5100182149362477,
  0.4532520325203252,
  0.45213849287169044,
  0.4738945578231292,
  0.45213849287169044,
  0.4723509035516047])

In [71]:
thr = np.array(thresholds).mean()
thr = 0.2146
print(f"Best threshold: {thr}")
print(f"Best score: {np.array(scores).mean()}")
subm_df['avg'] = subm_df[cols].apply('mean', axis=1)
subm_df[params['label_col']] = apply_thr(subm_df['avg'], thr, y_map)

Best threshold: 0.2146
Best score: 0.47400276382953893


In [72]:
subm_df.head(40)

Unnamed: 0,Ids,OCR,task_3,y_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,avg
0,image_ben_1422.png,বৌ-এর প্রেগনেন্সি রিপোর্ট পজিটিভ জানার পর বর চ...,Non Vulgar,0.221593,0.195037,0.218244,0.219149,0.21918,0.219178,0.219324,0.195029,0.218782,0.22001,0.214553
1,image_ben_3908.jpg,কিন্তু মিষ্টতা নেই,Vulgar,0.22178,0.194966,0.21837,0.219276,0.219295,0.219177,0.219385,0.195067,0.218859,0.220065,0.214624
2,image_ben_4634.jpg,মানুষ যা দেখে প্রথম সিঙারা আবিষ্কারের ধারণা পায়,Non Vulgar,0.221655,0.19494,0.218267,0.21913,0.219249,0.219151,0.219355,0.195031,0.218801,0.220033,0.214561
3,image_ben_3192.png,এক মেয়ে ফ্যানে ওড়না পাচিয়ে সুই*সাইড করতে যাচ্ছ...,Non Vulgar,0.221635,0.195004,0.218224,0.219124,0.219188,0.21928,0.219356,0.19505,0.218775,0.22003,0.214566
4,image_ben_4836.jpg,No text detected,Vulgar,0.221819,0.194996,0.218351,0.219234,0.219261,0.219666,0.2194,0.195322,0.218937,0.220051,0.214704
5,image_ben_3907.jpg,বাঘের চামড়া দিয়ে বিড়াল ঢেকে রাখার একটি চিত্র।,Non Vulgar,0.221684,0.194964,0.218251,0.219191,0.219195,0.21919,0.219363,0.195082,0.218821,0.220068,0.214581
6,image_ben_4636.jpg,"আমার উপর রেগে আছো রিয়া? না, রেগে নেই তাহলে মুখ...",Non Vulgar,0.221607,0.195057,0.218236,0.219135,0.219174,0.219148,0.219341,0.195037,0.218763,0.220014,0.214551
7,image_ben_5080.jpg,নিঃস্ব উপেন তার দুই বিঘা জমি,Non Vulgar,0.22169,0.194969,0.218282,0.219185,0.219215,0.219257,0.219352,0.195047,0.218842,0.220017,0.214586
8,image_ben_3704.jpg,"চলো চিন্টু মজা হবে ! Yes, chumki তাড়াতাড়ি এসো ...",Non Vulgar,0.221622,0.195075,0.218252,0.219151,0.219195,0.219144,0.219354,0.195027,0.218759,0.220021,0.21456
9,image_ben_5071.jpg,No text detected,Vulgar,0.221819,0.194996,0.218351,0.219234,0.219261,0.219666,0.2194,0.195322,0.218937,0.220051,0.214704


In [73]:
subm_df[['Ids', params['label_col']]].to_csv('submission_task3.csv', index=False)

# TASK 4

In [39]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import logging
import random
from functools import partial
import sklearn
from sklearn.model_selection import StratifiedKFold
import datasets
import transformers
import torch
import copy
from torch import nn
import gc
import torch.nn.functional as F
from tqdm.auto import tqdm, trange
logger = logging.getLogger(__name__)

In [40]:
def set_seeds(self, seed=2023): 
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

In [41]:
# Source: https://huggingface.co/l3cube-pune/assamese-bert
params = dict(learning_rate=1.0e-5, seed=2023, data_seed=2023, n_splits=10,
              model_id='models--l3cube-pune--bengali-bert', tensorboard_dir='tensorboard', label_col='task_4',
              patience=5, metric='f1', metric_mode='max', train_batch_size=32, eval_batch_size=32,
              train_max_length=512, eval_max_length=512, learning_rate_scheduling='cosine_annealing',
              gradient_accumulation_steps=1, warmup_proportion=50, num_train_epochs=100, max_seq=512,
              fp16=False, logging_steps=None, model_dir='/model/', password='Bangla')

y_map = {0.0: 'Non-abusive', 1.0: 'Abusive'}

In [42]:
set_seeds(params['seed'])

# Data Preparation

In [43]:
import getpass
import zipfile
from pathlib import Path
def prepare_data(path, password, train='Bangla_train_data.csv', test='bengali_test_data_wo_label.csv'):
    zfs = [zipfile.ZipFile(path / f.replace('.csv', '.zip')) for f in [train, test]]
    return [pd.read_csv(zfs[i].open(f, 'r', password)) for i, f in enumerate([train, test])]

def split(train_df, y, n_splits=5, data_seed=2023, shuffle=True):
    skf = StratifiedKFold(n_splits=n_splits, random_state=data_seed, shuffle=shuffle)
    train_df['split'] = -1
    for i, (train_index, test_index) in enumerate(skf.split(train_df.index, y)):
        train_df.loc[train_df.index.isin(test_index), 'split'] = i
    return train_df        

path = Path('Bangla')
password =  bytes(getpass.getpass(f"Input file password") if 'password' not in params else params['password'], 'utf-8')
train_df, test_df = prepare_data(path, password=password)
train_df = split(train_df, train_df['Abuse'], n_splits=params['n_splits'], data_seed=params['data_seed'])
train_df['task_4'] = train_df['Abuse'].map({v: k for k, v in y_map.items()})
print(len(train_df), len(test_df))

2693 1821


In [44]:
def prepare_feature(df, y_col, tokenizer, max_length):
    texts = df['OCR'].astype(str).tolist()
    features = tokenizer(texts, padding=False, truncation=True, return_tensors=None, max_length=max_length).data
    if y_col in df.columns:
        features['labels'] = df[y_col].tolist()
    return features

In [45]:
train_df.head()

Unnamed: 0,Ids,Sentiment,Sarcasm,Vulgar,Abuse,Target,OCR,split,task_4
0,image_ben_3635.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,I don't need any fucking GIRL বাঁরা কি সাইলেন...,2,1.0
1,image_ben_1586.png,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"After a little problem in relationship Boys, G...",8,1.0
2,image_ben_4040.jpg,Negative,Sarcastic,Non Vulgar,Abusive,Gender,"মা জানো, পাশের বাড়ির চুলবুলি আমার থেকে বেশি অ...",3,1.0
3,image_ben_5074.jpg,Neutral,Sarcastic,Non Vulgar,Abusive,Gender,Gf কে ফিঙ্গারং করে দেওয়ার পর যখন সে চরমসুখ অনু...,4,1.0
4,image_ben_7351.jpg,Negative,Sarcastic,Vulgar,Abusive,Gender,টিপতে টিপতে যখন ব্যাথ্যা অনুভব হয়,4,1.0


# Prepare Transformer Trainer

In [46]:
class Model(nn.Module):
    def __init__(self, transformer):
        super().__init__()
        self.transformer = transformer 
        
    def m_loss(self, logits, labels):
        return F.binary_cross_entropy_with_logits(logits.squeeze(-1), labels)
        
    def forward(self, **kwargs):
        labels = kwargs.pop("labels", None)
        output = self.transformer(**kwargs)
        output.loss = self.m_loss(output.logits, labels) if labels != None else output.loss
        output.logits = F.sigmoid(output.logits)
        return output
    
    def load(self, model_dir):
        self.transformer.from_pretrained(model_dir)
        print(f"Model loaded: {model_dir}")
    
    def save(self, model_dir):
        self.transformer.save_pretrained(model_dir)
    
class PTTrainer(transformers.Trainer):
    def __init__(self, **kwargs): 
        super().__init__(**kwargs)
        self.compute_metrics = self._metrics if self.eval_dataset is not None else None
        self._patience = 0

    def _metrics(self, eval_preds):
        """
        Evaluation metrics
        """
        threshold = 0.5
        thresholds = np.arange(0.01, 1.0, 0.01) 
        scores, labels = eval_preds
        acc, f1, thr = -1, -1, 0
        auc = sklearn.metrics.roc_auc_score(labels, scores)
        for threshold in thresholds:
            predictions = (scores >= threshold).astype(int)
            new_f1 = sklearn.metrics.f1_score(labels, predictions, average='macro')
            if new_f1 > f1:
                acc = sklearn.metrics.accuracy_score(labels, predictions)
                f1 = new_f1
                thr = threshold
        return {'auc': auc, 'accuracy': acc, 'f1': f1, 'thr': thr}
        
    def compute_loss(self, model, inputs, return_outputs=False):
        """
        forward/backward step
        """
        output = model(**inputs)
        return (output.loss, {'logits': output.logits}) if return_outputs else output.loss

    def _save_checkpoint(self, model, trial, metrics=None):
        # Determine the new best metric / best model checkpoint
        if metrics is not None and self.args.metric_for_best_model is not None:
            metric_to_check = self.args.metric_for_best_model
            if not metric_to_check.startswith("eval_"):
                metric_to_check = f"eval_{metric_to_check}"
            metric_value = metrics[metric_to_check]

            operator = np.greater if self.args.greater_is_better else np.less
            if (
                self.state.best_metric is None
                # or self.state.best_model_checkpoint is None
                or operator(metric_value, self.state.best_metric)
            ):
                self.state.best_metric = metric_value
                # self.state.best_model_checkpoint = self.args.output_dir
                model.save(self.args.output_dir)
                print(f'Model Saved: {metric_value}')
                self._patience = 0
            else:
                print(f"No improvments: {metric_value}, patience: {self._patience}, lr: {self.optimizer.param_groups[0]['lr']}")
                self._patience += 1
            # LR annealing
            if params['learning_rate_scheduling'].lower() == 'cosine_annealing':
                if (self._patience - 1) == int(round(params['patience'] / 2)):
                    if not isinstance(self.lr_scheduler, torch.optim.lr_scheduler.CosineAnnealingLR):
                        self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=10, eta_min=max(self.args.learning_rate * 0.01, 1.0e-7), verbose=False)
                        print(f'Starting CosineAnnealingLR schedular.')
                        self._patience -= 1
        else:
            model.save(self.args.output_dir)
            print('Model Saved')


class PTDefaultFlowCallback(transformers.DefaultFlowCallback):
    def on_epoch_end(self, args, state, control, **kwargs):
        control = super().on_epoch_end(args, state, control, **kwargs)
        # Evaluate
        if args.evaluation_strategy != transformers.IntervalStrategy.EPOCH \
            and state.epoch == args.num_train_epochs:
            control.should_evaluate = state.best_metric is not None
            control.should_save = True
        return control
    
def get_best_thr(history):
    max_i = -1
    for i, record in enumerate(history):
        if f"eval_{params['metric']}" in record:
            if max_i == -1 or record[f"eval_{params['metric']}"] > history[max_i][f"eval_{params['metric']}"]:
                max_i = i
    return history[max_i]['eval_thr'], history[max_i]['eval_f1']

def apply_thr(y, threshold, mapper=None):
    if mapper is None:
        return (y > threshold).astype(int)
    else:
        return [mapper[x] for x in (y > threshold).astype(int)]

def inference(pred_features, threshold=None):
    print(f"Threshold: {threshold}")
    result = trainer.predict(datasets.Dataset.from_dict(dict(pred_features))).predictions
    if threshold is None:
        test_df[params['label_col']] = result[:, 0]
    else:
        test_df[params['label_col']] = apply_thr(result[:, 0], threshold, y_map)
    return test_df[[col for col in test_df.columns if col != 'text']]

In [47]:
transformer = transformers.AutoModelForSequenceClassification.from_pretrained(params['model_id'], num_labels=1)
_tokenizer = transformers.AutoTokenizer.from_pretrained(params['model_id'], model_max_length=params['max_seq'])
callbacks = [PTDefaultFlowCallback(), 
             transformers.EarlyStoppingCallback(early_stopping_patience=params['patience'])]

if isinstance(params['logging_steps'], int):
    save_strategy = transformers.IntervalStrategy.STEPS
else:
    save_strategy = transformers.IntervalStrategy.EPOCH 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at models--l3cube-pune--bengali-bert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [48]:
def get_trainer(valid_split=0):
    _model = Model(copy.deepcopy(transformer))
    optimizer = torch.optim.AdamW(_model.parameters(), lr=params['learning_rate'])
    model_dir = f"{params['model_dir']}{valid_split}"
    t_df, v_df = train_df[train_df['split'] != valid_split], train_df[train_df['split'] == valid_split]
    print(f"Train: {len(t_df)}, Valid: {len(v_df)}")
    train_features = prepare_feature(t_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    valid_features = prepare_feature(v_df, params['label_col'], _tokenizer, max_length=params['train_max_length'])
    trainer = PTTrainer(model=_model,
        train_dataset=datasets.Dataset.from_dict(dict(train_features)),
        eval_dataset=datasets.Dataset.from_dict(dict(valid_features)) if valid_features is not None else None,
        args=transformers.TrainingArguments(
            label_names=['labels'],
            seed=params['seed'],
            data_seed=params['data_seed'],
            remove_unused_columns=False,
            report_to="tensorboard" if params['tensorboard_dir'] is not None else "none",
            logging_dir=params['tensorboard_dir'],
            per_device_train_batch_size=params['train_batch_size'], 
            per_device_eval_batch_size=params['eval_batch_size'],
            gradient_accumulation_steps=params['gradient_accumulation_steps'],
            warmup_steps=params['warmup_proportion'], 
            # max_steps=20,
            num_train_epochs=params['num_train_epochs'],
            learning_rate=params['learning_rate'], 
            fp16=params['fp16'],
            metric_for_best_model=params['metric'],
            # gradient_checkpointing=fp16,
            # evaluate_during_training=True,
            logging_steps=params['logging_steps'], 
            output_dir=model_dir,
            evaluation_strategy=save_strategy,
            save_strategy=save_strategy,
            logging_strategy=save_strategy,
            save_total_limit=1,
            greater_is_better=params['metric_mode'], 
            load_best_model_at_end=True,
            eval_steps=params['logging_steps'],
            save_steps=params['logging_steps'],
            overwrite_output_dir=True,
            max_grad_norm=1.0,
        ),
        optimizers=[optimizer, transformers.get_constant_schedule(optimizer)],
        callbacks=callbacks,
        data_collator=transformers.DataCollatorWithPadding(_tokenizer),
    )
    return _model, trainer, model_dir

# Train / Inference

In [49]:
pred_features = prepare_feature(test_df, params['label_col'], _tokenizer, max_length=params['eval_max_length'])
subm_df = None
thresholds = []
scores = []
cols = []
for fold in range(params['n_splits']):
    print(f"Fold: {fold + 1} out of {params['n_splits']}")
    col = f'y_{fold}'
    model, trainer, model_dir = get_trainer(fold)
    trainer.train()
    trainer.model.load(model_dir)
    subm = inference(pred_features)
    t, f1 = get_best_thr(trainer.state.log_history)
    thresholds.append(t)
    scores.append(f1)
    if subm_df is None:
        subm_df = subm
    subm_df[col] = subm[params['label_col']]
    del model, trainer
    torch.cuda.empty_cache()
    gc.collect()
    cols.append(col)

Fold: 1 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6708,0.642109,0.569981,0.725926,0.433417,0.44
2,0.6286,0.612781,0.541264,0.725926,0.420601,0.39
3,0.606,0.596858,0.550124,0.725926,0.420601,0.34
4,0.5938,0.589802,0.569843,0.725926,0.420601,0.31
5,0.5899,0.588186,0.555261,0.725926,0.420601,0.3
6,0.589,0.587786,0.532543,0.725926,0.420601,0.29


Model Saved: 0.4334165154264973
No improvments: 0.4206008583690987, patience: 0, lr: 1e-05
No improvments: 0.4206008583690987, patience: 1, lr: 1e-05
No improvments: 0.4206008583690987, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4206008583690987, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4206008583690987, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/0
Threshold: None


Fold: 2 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6707,0.64185,0.545746,0.725926,0.420601,0.44
2,0.6297,0.612918,0.52158,0.725926,0.420601,0.39
3,0.6101,0.597408,0.511066,0.725926,0.420601,0.35
4,0.5976,0.589676,0.556467,0.725926,0.420601,0.31
5,0.5914,0.58814,0.532095,0.725926,0.420601,0.3
6,0.589,0.587731,0.535438,0.725926,0.420601,0.29


Model Saved: 0.4206008583690987
No improvments: 0.4206008583690987, patience: 0, lr: 1e-05
No improvments: 0.4206008583690987, patience: 1, lr: 1e-05
No improvments: 0.4206008583690987, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4206008583690987, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4206008583690987, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/1
Threshold: None


Fold: 3 out of 10
Train: 2423, Valid: 270




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6707,0.642672,0.504137,0.718519,0.430569,0.44
2,0.6281,0.612544,0.538058,0.725926,0.420601,0.39
3,0.6057,0.59696,0.501069,0.725926,0.420601,0.34
4,0.5936,0.589757,0.564431,0.725926,0.420601,0.31
5,0.5898,0.588183,0.542402,0.725926,0.420601,0.3
6,0.589,0.58777,0.576048,0.725926,0.420601,0.29


Model Saved: 0.4305694305694306
No improvments: 0.4206008583690987, patience: 0, lr: 1e-05
No improvments: 0.4206008583690987, patience: 1, lr: 1e-05
No improvments: 0.4206008583690987, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4206008583690987, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4206008583690987, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/2
Threshold: None


Fold: 4 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.672,0.642152,0.450797,0.724907,0.420259,0.44
2,0.6281,0.6132,0.487907,0.724907,0.420259,0.39
3,0.6057,0.597424,0.432918,0.724907,0.420259,0.34
4,0.5934,0.590508,0.480042,0.724907,0.420259,0.31
5,0.5897,0.589318,0.505128,0.724907,0.420259,0.3
6,0.589,0.588619,0.532467,0.724907,0.420259,0.29


Model Saved: 0.4202586206896552
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
No improvments: 0.4202586206896552, patience: 1, lr: 1e-05
No improvments: 0.4202586206896552, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4202586206896552, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4202586206896552, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/3
Threshold: None


Fold: 5 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6704,0.641602,0.464068,0.724907,0.420259,0.44
2,0.6283,0.613785,0.506618,0.724907,0.420259,0.39
3,0.6061,0.59801,0.500728,0.724907,0.420259,0.35
4,0.594,0.590658,0.508281,0.724907,0.420259,0.31
5,0.5898,0.589283,0.570374,0.724907,0.420259,0.3
6,0.5888,0.58873,0.497748,0.724907,0.420259,0.29


Model Saved: 0.4202586206896552
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
No improvments: 0.4202586206896552, patience: 1, lr: 1e-05
No improvments: 0.4202586206896552, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4202586206896552, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4202586206896552, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/4
Threshold: None


Fold: 6 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.671,0.642061,0.511469,0.724907,0.420259,0.44
2,0.6281,0.613655,0.572557,0.724907,0.420259,0.39
3,0.606,0.597723,0.564484,0.72119,0.431642,0.34
4,0.594,0.590798,0.588635,0.728625,0.4345,0.31
5,0.5891,0.588658,0.537076,0.724907,0.420259,0.29
6,0.5874,0.581813,0.539016,0.750929,0.564504,0.28
7,0.5652,0.551553,0.631462,0.776952,0.674334,0.38
8,0.548,0.549437,0.686937,0.769517,0.675121,0.43
9,0.5223,0.538865,0.690194,0.754647,0.675263,0.4
10,0.4863,0.519372,0.665454,0.784387,0.677218,0.35


Model Saved: 0.4202586206896552
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
Model Saved: 0.43164211060089586
Model Saved: 0.43449964002879765
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
Model Saved: 0.5645040473601546
Model Saved: 0.6743341404358354
Model Saved: 0.6751207729468599
Model Saved: 0.6752633889376646
Model Saved: 0.6772178086726249
Model Saved: 0.7195845416121757
No improvments: 0.6558535897825097, patience: 0, lr: 1e-05
No improvments: 0.6883482290632241, patience: 1, lr: 1e-05
No improvments: 0.6821501210653753, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.6991906900930611, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.6556318854690684, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/5
Threshold: None


Fold: 7 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6711,0.641656,0.455267,0.724907,0.420259,0.44
2,0.6285,0.613648,0.449168,0.724907,0.420259,0.39
3,0.6061,0.597698,0.436001,0.724907,0.420259,0.34
4,0.594,0.590655,0.588635,0.724907,0.420259,0.31
5,0.5896,0.589338,0.44948,0.724907,0.420259,0.3
6,0.589,0.588648,0.452079,0.724907,0.420259,0.29


Model Saved: 0.4202586206896552
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
No improvments: 0.4202586206896552, patience: 1, lr: 1e-05
No improvments: 0.4202586206896552, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4202586206896552, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4202586206896552, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/6
Threshold: None


Fold: 8 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6712,0.641613,0.515835,0.724907,0.420259,0.44
2,0.6281,0.613686,0.486972,0.724907,0.420259,0.39
3,0.6059,0.597791,0.488011,0.732342,0.448394,0.34
4,0.5942,0.590691,0.567256,0.724907,0.420259,0.31
5,0.5893,0.588766,0.514414,0.724907,0.420259,0.3
6,0.5883,0.58826,0.55544,0.724907,0.420259,0.28
7,0.5871,0.586788,0.606237,0.665428,0.602541,0.28
8,0.5771,0.587468,0.55596,0.717472,0.474069,0.27
9,0.5639,0.584026,0.637457,0.728625,0.490014,0.27
10,0.552,0.574049,0.698857,0.717472,0.592424,0.26


Model Saved: 0.4202586206896552
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
Model Saved: 0.44839371155160623
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
No improvments: 0.4202586206896552, patience: 1, lr: 1e-05
No improvments: 0.4202586206896552, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
Model Saved: 0.6025413711583925
No improvments: 0.4740687384235439, patience: 0, lr: 1.0453658778440268e-06
No improvments: 0.49001428385923906, patience: 1, lr: 1.045365877844024e-06
No improvments: 0.5924242424242424, patience: 2, lr: 6.579634122156521e-06
Model Saved: 0.6774150566289141
No improvments: 0.6699491690170021, patience: 0, lr: 6.579634122155324e-06
No improvments: 0.5562311463046757, patience: 1, lr: 1.045365877844104e-06
No improvments: 0.6515544041450777, patience: 2, lr: 1.0453658778441719e-06
No improvments: 0.6209092332585208, patience: 3, lr: 6.5796341221570615e-06
No improvments: 0.6269954004329005, patience: 4, lr: 1.0000000000001

Fold: 9 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6713,0.641601,0.522488,0.724907,0.420259,0.44
2,0.6285,0.613436,0.550381,0.724907,0.420259,0.39
3,0.606,0.597625,0.510222,0.724907,0.420259,0.34
4,0.5941,0.590741,0.57377,0.724907,0.420259,0.31
5,0.5899,0.589319,0.55544,0.724907,0.420259,0.3
6,0.5887,0.58861,0.566667,0.724907,0.420259,0.29


Model Saved: 0.4202586206896552
No improvments: 0.4202586206896552, patience: 0, lr: 1e-05
No improvments: 0.4202586206896552, patience: 1, lr: 1e-05
No improvments: 0.4202586206896552, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.4202586206896552, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.4202586206896552, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/8
Threshold: None


Fold: 10 out of 10
Train: 2424, Valid: 269




Epoch,Training Loss,Validation Loss,Auc,Accuracy,F1,Thr
1,0.6705,0.640591,0.452824,0.728625,0.421505,0.44
2,0.628,0.612093,0.521002,0.728625,0.421505,0.39
3,0.6062,0.59538,0.502027,0.728625,0.421505,0.34
4,0.5945,0.587736,0.526454,0.728625,0.421505,0.31
5,0.5902,0.586191,0.51929,0.728625,0.421505,0.3
6,0.5893,0.585363,0.549273,0.728625,0.421505,0.29


Model Saved: 0.421505376344086
No improvments: 0.421505376344086, patience: 0, lr: 1e-05
No improvments: 0.421505376344086, patience: 1, lr: 1e-05
No improvments: 0.421505376344086, patience: 2, lr: 1e-05
Starting CosineAnnealingLR schedular.
No improvments: 0.421505376344086, patience: 2, lr: 6.579634122155956e-06
No improvments: 0.421505376344086, patience: 3, lr: 1.0453658778440268e-06
Model loaded: /model/9
Threshold: None


In [50]:
thresholds, scores

([0.44, 0.44, 0.44, 0.44, 0.44, 0.23, 0.44, 0.32, 0.44, 0.44],
 [0.4334165154264973,
  0.4206008583690987,
  0.4305694305694306,
  0.4202586206896552,
  0.4202586206896552,
  0.7195845416121757,
  0.4202586206896552,
  0.6774150566289141,
  0.4202586206896552,
  0.421505376344086])

In [54]:
thr = np.array(thresholds).mean()
thr = 0.38
print(f"Best threshold: {thr}")
print(f"Best score: {np.array(scores).mean()}")
subm_df['avg'] = subm_df[cols].apply('mean', axis=1)
subm_df[params['label_col']] = apply_thr(subm_df['avg'], thr, y_map)

Best threshold: 0.38
Best score: 0.4784126261708823


In [55]:
subm_df.head(40)

Unnamed: 0,Ids,OCR,task_4,y_0,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,avg
0,image_ben_1422.png,বৌ-এর প্রেগনেন্সি রিপোর্ট পজিটিভ জানার পর বর চ...,Abusive,0.288952,0.288376,0.28885,0.28683,0.288644,0.806509,0.287427,0.716673,0.287545,0.288914,0.382872
1,image_ben_3908.jpg,কিন্তু মিষ্টতা নেই,Non-abusive,0.289022,0.288287,0.288786,0.287114,0.289142,0.113413,0.287651,0.177219,0.287506,0.288677,0.259682
2,image_ben_4634.jpg,মানুষ যা দেখে প্রথম সিঙারা আবিষ্কারের ধারণা পায়,Non-abusive,0.288665,0.288058,0.288789,0.286801,0.288819,0.112949,0.28738,0.177064,0.287397,0.288756,0.259468
3,image_ben_3192.png,এক মেয়ে ফ্যানে ওড়না পাচিয়ে সুই*সাইড করতে যাচ্ছ...,Non-abusive,0.289117,0.288216,0.288736,0.286846,0.288667,0.806471,0.287471,0.189062,0.287493,0.289032,0.330111
4,image_ben_4836.jpg,No text detected,Non-abusive,0.288921,0.288657,0.288618,0.286954,0.28881,0.140698,0.287619,0.177425,0.287726,0.288943,0.262437
5,image_ben_3907.jpg,বাঘের চামড়া দিয়ে বিড়াল ঢেকে রাখার একটি চিত্র।,Non-abusive,0.288799,0.288116,0.288844,0.286988,0.288819,0.409049,0.287567,0.527286,0.287481,0.288838,0.324179
6,image_ben_4636.jpg,"আমার উপর রেগে আছো রিয়া? না, রেগে নেই তাহলে মুখ...",Non-abusive,0.28883,0.28796,0.288745,0.286842,0.288714,0.402686,0.287533,0.716284,0.287453,0.28886,0.342391
7,image_ben_5080.jpg,নিঃস্ব উপেন তার দুই বিঘা জমি,Non-abusive,0.288723,0.288281,0.288668,0.286887,0.288898,0.113022,0.287421,0.177105,0.287541,0.288862,0.259541
8,image_ben_3704.jpg,"চলো চিন্টু মজা হবে ! Yes, chumki তাড়াতাড়ি এসো ...",Abusive,0.288897,0.288073,0.288664,0.286803,0.288729,0.805769,0.287389,0.716594,0.287463,0.288905,0.382729
9,image_ben_5071.jpg,No text detected,Non-abusive,0.288921,0.288657,0.288618,0.286954,0.28881,0.140698,0.287619,0.177425,0.287726,0.288943,0.262437


In [56]:
subm_df[['Ids', params['label_col']]].to_csv('submission_task4.csv', index=False)