In [None]:
!nvidia-smi

Sun May  9 01:00:44 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    49W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import sys
print(sys.version_info)

sys.version_info(major=3, minor=7, micro=10, releaselevel='final', serial=0)


In [None]:
import time
START = time.time()

In [None]:
!pip install transformers==4.5



In [None]:
!cp drive/MyDrive/zindi_nlp/MalawiNews/*.csv .

In [None]:
! pip install sentencepiece



In [None]:
import torch
import os
import sys
from torch import nn
import transformers
from transformers import AutoTokenizer, AutoModel
from torch.nn import functional as F
from sklearn.metrics import accuracy_score
from tqdm.autonotebook import tqdm
from torch.cuda.amp import GradScaler, autocast
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import gc

In [None]:
import random
import numpy as np 
import torch
from tqdm.notebook import tqdm
from collections import Counter
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


SEED = 42

def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

      
    
seed_everything(SEED)

In [None]:
print(torch.__version__)

1.8.1+cu101


# Utils

In [None]:
def train_clf(model, dataloader, optimizer,  loss_fn, cfg, scheduler=None):
  scaler = GradScaler()
  accumulation = cfg.accumulation

  t = tqdm(dataloader, disable= not cfg.verbose)
  total_loss = 0
  model.train()
  
  for i, batch in enumerate(t):
    x, y = batch

    x = x.to(cfg.device, dtype=torch.long)
    y = y.to(cfg.device, dtype=torch.long)
    with autocast(cfg.use_apex):
      outputs = model(x)

    loss = loss_fn(outputs, y)

    total_loss += loss.item()


    if cfg.use_apex:
        loss = loss/accumulation
        scaler.scale(loss).backward()
    else:
        loss = loss/accumulation
        loss.backward()

    if (i+1)%accumulation == 0 or i-1 == len(t):
        if cfg.use_apex:
            scaler.step(optimizer)

            # Updates the scale for next iteration.
            scaler.update()
            optimizer.zero_grad()
        else:                
            optimizer.step()
            optimizer.zero_grad()
            

    if scheduler is not None:
      scheduler.step()  
    t.set_description("Loss : {0} ".format(total_loss/(i+1)) )
    t.refresh()       

  

In [None]:
def eval_clf(model, dataloader, loss_fn, criterion, cfg):
  total_loss = 0.
  t=tqdm(dataloader, disable= not cfg.verbose_val)
  y_true = []
  y_preds = []
  model.eval()
  device = cfg.device
  with torch.no_grad():
      for i, batch in enumerate(t):
          
          inputs,  labels = batch
          
          inputs = inputs.to(device, dtype=torch.long)
          labels = labels.to(device, dtype=torch.long)
          with autocast(cfg.use_apex):
            outputs = model(inputs)
          
          loss = loss_fn(outputs, labels ) 
          total_loss += loss.detach().cpu().numpy()
          
          t.set_description("Loss : {0}".format(total_loss/(i+1)))
          t.refresh()
      
          y_true.append(labels.detach().cpu().numpy())
          y_preds.append( outputs.cpu().detach().numpy())


  y_preds = np.concatenate(y_preds)
  y_true = np.concatenate(y_true)
  score = criterion(y_true, y_preds)
  return y_preds, y_true, score, total_loss/(i+1)

In [None]:
def inference_clf(model, dataloader, cfg):
  total_loss = 0.
  t=tqdm(dataloader, disable= not cfg.verbose_val)
  
  y_preds = []
  model.eval()
  device = cfg.device
  with torch.no_grad():
      for i, batch in enumerate(t):
          
          inputs = batch[0]
          
          inputs = inputs.to(device, dtype=torch.long)
           
          outputs = model(inputs)
          y_preds.append( torch.softmax(outputs, axis=-1).cpu().detach().numpy())
  y_preds = np.concatenate(y_preds)
  return y_preds 

In [None]:
def accuracy(y_true, y_preds, one_hot_label=False):
  p = np.argmax(y_preds, axis=1)
  if one_hot_label:
    y_true = np.argmax(y_true, axis=1)
  return accuracy_score(y_true, p)

# Dataset

In [None]:
def encode_texts(texts, tokenizer, max_lengths):
  encode_text = []
  for text in tqdm(texts):
    encode_text.append(tokenizer.encode(text, max_length=max_lengths, padding="max_length", truncation=True, return_attention_mask=False, return_token_type_ids=False))

  return np.stack(encode_text)

class TextDataset(torch.utils.data.Dataset):
    """
        Dataloader test which slices n seconds based on tmin and tmax
    
    """
    def __init__(self, samples, labels=None, tokenizer=None, max_length=130):
        self.labels = None
        self.samples=samples
        self.tokenizer = tokenizer
        if labels is not None:
          self.labels = labels
        self.max_length = max_length
    
    
    def __getitem__(self, index):
        l = []
        
        text = self.samples[index] if self.tokenizer is None else tokenizer.encode(self.samples[index],  max_length=self.max_length, padding="max_length", truncation=True, return_attention_mask=False, return_token_type_ids=False)
        l.append(torch.as_tensor(text).long())

        if self.labels is not None:
            l.append( torch.as_tensor(self.labels[index]))
        #l.append(torch.tensor(index))
        return tuple(l)
    
    def __len__(self):
        return len(self.samples)

class TextDatasetSequence(torch.utils.data.Dataset):
    """
        Dataloader test which slices n seconds based on tmin and tmax
    
    """
    def __init__(self, samples, labels=None, max_length=130, padding=0):
        self.labels = None
        self.samples=samples
        if labels is not None:
          self.labels = labels
        self.max_length = max_length
    
    
    def __getitem__(self, index):
        l = []
        
        text = self.samples[index] 
        length = len(text)

        if self.max_length - length > 0:
          text = np.array(text.tolist() + [self.padding] * (self.max_length - length))

        l.append(torch.as_tensor(text).long())
        l.append(torch.as_tensor(length))
        if self.labels is not None:
            l.append( torch.as_tensor(self.labels[index]))
        #l.append(torch.tensor(index))
        return tuple(l)
    
    def __len__(self):
        return len(self.samples)

# Model

In [None]:
class AutoClassifier(nn.Module):
  def __init__(self, backbone, num_class, padding_idx, hidden_size=768):
    super(AutoClassifier, self).__init__()
    self.padding_idx = padding_idx
    self.backbone = backbone
    self.num_class = num_class
    self.cls = nn.Sequential(nn.Dropout(0.25), nn.Linear(hidden_size, 512), nn.ReLU(), nn.Dropout(0.25), nn.Linear(512, self.num_class))

  def forward(self, input):
    attention_mask  = (~(input == self.padding_idx)).long()
    emb = self.backbone(input, attention_mask=attention_mask)
    emb = emb[0][:,0] # CLS
    return self.cls(emb)

class AutoClassifierM(nn.Module):
  def __init__(self, backbone, num_class, padding_idx, hidden_size=768):
    super(AutoClassifierM, self).__init__()
    self.padding_idx = padding_idx
    self.backbone = backbone
    self.num_class = num_class
    self.cls = nn.Sequential(nn.Dropout(0.25), nn.Linear(hidden_size, 512), nn.ReLU(), nn.Dropout(0.25), nn.Linear(512, self.num_class))

  def forward(self, input):
    attention_mask  = (~(input == self.padding_idx)).long()
    hidden_states = self.backbone(input, attention_mask=attention_mask).last_hidden_state
    eos_mask = input_ids.eq(self.config.eos_token_id)

    if len(torch.unique(eos_mask.sum(1))) > 1:
        raise ValueError("All examples must have the same number of <eos> tokens.")
    sentence_representation = hidden_states[eos_mask, :].view(hidden_states.size(0), -1, hidden_states.size(-1))[:, -1, :]
    return self.cls(sentence_representation)

In [None]:
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
from sklearn.model_selection import StratifiedKFold

# Aug


In [1]:
from nltk import sent_tokenize
from albumentations.core.transforms_interface import DualTransform, BasicTransform
import nltk
nltk.download('punkt')
class NLPTransform(BasicTransform):
    """ Transform for nlp task."""
    LANGS = {
        'en': 'english'
    }

    @property
    def targets(self):
        return {"data": self.apply}
    
    def update_params(self, params, **kwargs):
        if hasattr(self, "interpolation"):
            params["interpolation"] = self.interpolation
        if hasattr(self, "fill_value"):
            params["fill_value"] = self.fill_value
        return params

    def get_sentences(self, text, lang='en'):
        return sent_tokenize(text, self.LANGS.get(lang, 'english'))
class SwapWordsTransform(NLPTransform):
    """ Swap words next to each other """
    def __init__(self, swap_distance=1, swap_probability=0.1, always_apply=False, p=0.5):
        """  
        swap_distance - distance for swapping words
        swap_probability - probability of swapping for one word
        """
        super(SwapWordsTransform, self).__init__(always_apply, p)
        self.swap_distance = swap_distance
        self.swap_probability = swap_probability
        self.swap_range_list = list(range(1, swap_distance+1))

    def apply(self, data, **params):
        text, lang = data
        words = text.split()
        words_count = len(words)
        if words_count <= 1:
            return text, lang

        new_words = {}
        for i in range(words_count):
            if random.random() > self.swap_probability:
                new_words[i] = words[i]
                continue
    
            if i < self.swap_distance:
                new_words[i] = words[i]
                continue
    
            swap_idx = i - random.choice(self.swap_range_list)
            new_words[i] = new_words[swap_idx]
            new_words[swap_idx] = words[i]

        return ' '.join([v for k, v in sorted(new_words.items(), key=lambda x: x[0])]), lang
      
class CutOutWordsTransform(NLPTransform):
    """ Remove random words """
    def __init__(self, cutout_probability=0.05, always_apply=False, p=0.5):
        super(CutOutWordsTransform, self).__init__(always_apply, p)
        self.cutout_probability = cutout_probability

    def apply(self, data, **params):
        text, lang = data
        words = text.split()
        words_count = len(words)
        if words_count <= 1:
            return text, lang
        
        new_words = []
        for i in range(words_count):
            if random.random() < self.cutout_probability:
                continue
            new_words.append(words[i])

        if len(new_words) == 0:
            return words[random.randint(0, words_count-1)], lang

        return ' '.join(new_words), lang
class ShuffleSentencesTransform(NLPTransform):
    """ Do shuffle by sentence """
    def __init__(self, always_apply=False, p=0.5):
        super(ShuffleSentencesTransform, self).__init__(always_apply, p)

    def apply(self, data, **params):
        text, lang = data
        sentences = self.get_sentences(text, lang)
        random.shuffle(sentences)
        return ' '.join(sentences), lang


      
transform = SwapWordsTransform(p=1.0, swap_distance=1, swap_probability=0.2)
tr=CutOutWordsTransform(p=1.0, cutout_probability=0.2)
shufl=ShuffleSentencesTransform(p=0.8)

def add_augmentation(tmp_trn):
  lang = 'en'
  tmp = tmp_trn.copy().reset_index(drop=True)
  for aug in [shufl,tr,transform]:
    trdf=tmp_trn.copy().reset_index(drop=True)
    for i in range(len(trdf)):
            text = trdf['Text'][i]
            """text=transform(data=(text, lang))['data'][0]
            text=transform(data=(text, lang))['data'][0]"""
            trdf['Text'][i]=aug(data=(text, lang))['data'][0]
    witch=trdf[trdf['label']==class2id["WITCHCRAFT"]]
    flood=trdf[trdf['label']==class2id["FLOODING"]]
    art=trdf[trdf['label']==class2id["ARTS AND CRAFTS"]]
    music=trdf[trdf['label']==class2id["MUSIC"]]
    transport=trdf[trdf['label']==class2id["TRANSPORT"]]

    cultur=trdf[trdf['label']==class2id["CULTURE"]]
    local=trdf[trdf['label']==class2id["LOCALCHIEFS"]]
    opinion=trdf[trdf['label']==class2id["OPINION/ESSAY"]]

    wild=trdf[trdf['label']==class2id["WILDLIFE/ENVIRONMENT"]]
    relation=trdf[trdf['label']==class2id["RELATIONSHIPS"]]
    educ=trdf[trdf['label']==class2id["EDUCATION"]]
    sport=trdf[trdf['label']==class2id["SPORTS"]]
    tmp_trn=pd.concat([tmp_trn,witch,witch,flood,flood,art,music,transport],axis=0).sample(frac=1).reset_index(drop=True)
    print(tmp_trn.shape)
  return tmp_trn

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# Training

In [None]:
# 
class2id = {"POLITICS":0, "SOCIAL":1, "RELIGION":2, "LAW/ORDER":3, "SOCIAL ISSUES":4, "HEALTH":5, "ECONOMY":6, "FARMING":7, "SPORTS":8, "EDUCATION":9,
            "RELATIONSHIPS":10, "WILDLIFE/ENVIRONMENT":11, "OPINION/ESSAY":12, "LOCALCHIEFS":13, "CULTURE": 14, "WITCHCRAFT":15, "MUSIC":16, "TRANSPORT":17,
            "FLOODING":18, "ARTS AND CRAFTS":19}
id2class = {x:y for y,x in class2id.items()}  

In [None]:
print(class2id)
print(id2class)

{'POLITICS': 0, 'SOCIAL': 1, 'RELIGION': 2, 'LAW/ORDER': 3, 'SOCIAL ISSUES': 4, 'HEALTH': 5, 'ECONOMY': 6, 'FARMING': 7, 'SPORTS': 8, 'EDUCATION': 9, 'RELATIONSHIPS': 10, 'WILDLIFE/ENVIRONMENT': 11, 'OPINION/ESSAY': 12, 'LOCALCHIEFS': 13, 'CULTURE': 14, 'WITCHCRAFT': 15, 'MUSIC': 16, 'TRANSPORT': 17, 'FLOODING': 18, 'ARTS AND CRAFTS': 19}
{0: 'POLITICS', 1: 'SOCIAL', 2: 'RELIGION', 3: 'LAW/ORDER', 4: 'SOCIAL ISSUES', 5: 'HEALTH', 6: 'ECONOMY', 7: 'FARMING', 8: 'SPORTS', 9: 'EDUCATION', 10: 'RELATIONSHIPS', 11: 'WILDLIFE/ENVIRONMENT', 12: 'OPINION/ESSAY', 13: 'LOCALCHIEFS', 14: 'CULTURE', 15: 'WITCHCRAFT', 16: 'MUSIC', 17: 'TRANSPORT', 18: 'FLOODING', 19: 'ARTS AND CRAFTS'}


In [None]:
def encode_texts_left(texts, tokenizer, max_lengths):
  encode_text = []
  for text in tqdm(texts):
    encoded = tokenizer.encode(text, max_length=max_lengths, padding="max_length", truncation=False, return_attention_mask=False, return_token_type_ids=False)
    length = len(encoded)
    if length > max_lengths:
      to_remove = length - ( max_lengths - 1)
      encoded = [tokenizer.cls_token_id] + encoded[to_remove:]
      if len(encoded) != max_lengths:
        print(len(encoded))
        raise("error in tokenizer left")
    encode_text.append(encoded)

  return np.stack(encode_text)

In [None]:
import json
class Config():
  def __init__(self, name, kfold=5, batch_size=16, accumulation=1, loss_fn=nn.CrossEntropyLoss):
      self.kfold = kfold
      self.device = "cuda" if torch.cuda.is_available() else "cpu"
      self.use_apex = True
      self.batch_size = batch_size
      self.accumulation = accumulation
      self.name = name
      self.ckpt ="XLM-Roberta-large-200-40folds-aug" #  "xlm-roberta" #
      self.save_name = "./" + self.ckpt
      self.save_name_csv = "drive/MyDrive/zindi_nlp/MalawiNews/XLM-r-large/ckpt-80000/" + self.ckpt
      self.num_class = len(class2id)
      self.max_length= 200
      self.epochs= 8 #15
      self.early_stopping =5
      self.warmup = 0.01
      self.verbose = True
      self.verbose_val = False
      # loss function
      self.loss_fn = loss_fn
      self.lr = [2e-5]*self.kfold # 1e-5
      self.wd=3e-5
      self.stop_epoch=50

cfg = Config(name="drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000", batch_size =4, kfold=40, accumulation=8) #
if not os.path.exists(cfg.save_name_csv):
  os.makedirs(cfg.save_name_csv)
#cfg_str = json.dumps(cfg)
if not os.path.exists(cfg.save_name):
  os.makedirs(cfg.save_name)

In [None]:
train = pd.read_csv("Train.csv")
test = pd.read_csv("Test.csv")
sub = pd.read_csv("SampleSubmission.csv")

In [None]:
train["label"] = train.Label.apply(lambda x: class2id[x])

In [None]:
tokenizer = AutoTokenizer.from_pretrained(cfg.name, use_fast=False)


In [None]:
tokenizer

PreTrainedTokenizer(name_or_path='drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000', vocab_size=250002, model_max_len=512, is_fast=False, padding_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=False)})

In [None]:
# Encode test data
test_texts = encode_texts(test.Text, tokenizer, cfg.max_length)
test_dataset = TextDataset(test_texts)
test_dataloader = DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=4)

HBox(children=(FloatProgress(value=0.0, max=620.0), HTML(value='')))




In [None]:
# Encode test data 
test_texts_left = encode_texts_left(test.Text, tokenizer, cfg.max_length)
test_dataset_left = TextDataset(test_texts_left)
test_dataloader_left = DataLoader(test_dataset_left, batch_size=cfg.batch_size, shuffle=False, num_workers=4)

HBox(children=(FloatProgress(value=0.0, max=620.0), HTML(value='')))




In [None]:
all_texts = encode_texts(train.Text, tokenizer, cfg.max_length)
all_labels = train.label.values

HBox(children=(FloatProgress(value=0.0, max=1436.0), HTML(value='')))




In [None]:
max_ = train.Label.value_counts().max()
weights_name = max_/train.Label.value_counts()
weights = torch.as_tensor([weights_name[id2class[x]] for x in range(len(id2class)) ]).half()

#loss_fn=nn.CrossEntropyLoss()
loss_fn=nn.CrossEntropyLoss(weight=weights).to(cfg.device)

In [None]:
BERTWEET_MODEL=True
ELECTRA_MODEL=False
XLMROBERTA_MODEL = False
mBart_MODEL = False
EVAL_ONLY=False

In [None]:
if BERTWEET_MODEL:
  skf = StratifiedKFold(n_splits=cfg.kfold, random_state=42)
  test_preds = []
  test_preds_left = []
  oof_preds = []
  oof_targets = []
  dataloaders = []
  fold2use = {i:[] for i in range(10)}
  results =[]
  for fold, (train_idx, val_idx) in enumerate(skf.split( np.zeros(len(train)), train.label.values)):
    print(f"#### FOLD : {fold} ####")
    counter = 0
    best_score = 0
    
    # add augmentation on train
    #print(train.iloc[train_idx].label.value_counts())
    curr_tr = add_augmentation(train.iloc[train_idx])
    #print(curr_tr.label.value_counts())
    train_text = encode_texts(curr_tr.Text, tokenizer, cfg.max_length)
    train_label = curr_tr .label.values
    # get train/val


    train_dataset = TextDataset(train_text, labels=train_label , max_length=cfg.max_length)
    val_dataset = TextDataset(all_texts[val_idx], labels=all_labels[val_idx], max_length=cfg.max_length)
    
    train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_dataset, batch_size=cfg.batch_size*2, shuffle=False, num_workers=1)
    
    # optimizer and model and scheduler
    total_steps = cfg.epochs * len(train_dataloader)
    num_warmup_steps = cfg.warmup * total_steps
    num_training_steps = total_steps
    backbone = AutoModel.from_pretrained(cfg.name)
    model = AutoClassifier(backbone, num_class=cfg.num_class, padding_idx=tokenizer.pad_token_id, hidden_size=1024).to(cfg.device)


    max_ = curr_tr.label.value_counts().max()
    weights_name = max_/curr_tr.label.value_counts()
    weights = torch.as_tensor([weights_name[x] for x in range(len(id2class)) ]).half()

    #loss_fn=nn.CrossEntropyLoss()
    #print(weights)
    loss_fn=nn.CrossEntropyLoss(weight=weights).to(cfg.device)

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": cfg.wd,
        },
        {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
    ]

    optimizer = AdamW(optimizer_grouped_parameters, lr=cfg.lr[fold])#, weight_decay=cfg.wd)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps , num_training_steps=num_training_steps)
    if not EVAL_ONLY:
      for e in range(cfg.epochs):
        train_clf(model, train_dataloader, optimizer,  loss_fn, cfg, scheduler=scheduler)
        y_preds ,y_true, val_score, val_loss = eval_clf(model, val_dataloader, loss_fn, accuracy, cfg)

        if val_score > best_score:
          print(f"Improvement from {best_score} to {val_score}, saving model ...")
          best_score = val_score
          torch.save(model.state_dict(), cfg.save_name + "/" +  f"{cfg.ckpt}-fold{fold}.pth")
          counter = 0
        else:
          print("not an improvement :", val_score)
          counter +=1

        if counter > cfg.early_stopping:
          print("========= Early stopping ========")
          break
        if cfg.stop_epoch is not None:
          if e >= cfg.stop_epoch:
            print("========= Early stopping ========")
            break
    model.load_state_dict(torch.load(cfg.save_name + "/" + f"{cfg.ckpt}-fold{fold}.pth"))
    y_preds ,y_true, val_score, val_loss = eval_clf(model, val_dataloader, loss_fn, accuracy, cfg)
    oof_targets.append(y_true)
    oof_preds.append(y_preds)
    preds = inference_clf(model, test_dataloader, cfg)
    preds2 = inference_clf(model, test_dataloader_left, cfg)
    test_preds.append(preds)
    test_preds_left.append((preds+preds2)/2.0)
    dataloaders.append(val_dataloader)
    results.append(val_score)
    os.remove(cfg.save_name + "/" +  f"{cfg.ckpt}-fold{fold}.pth")
    del preds, preds2
    gc.collect()
  test_preds=np.stack(test_preds)
  test_preds_left=np.stack(test_preds_left)

  oof_preds = np.concatenate(oof_preds)
  oof_targets = np.concatenate(oof_targets)
  avg = accuracy(oof_targets, oof_preds)
  print("AVG ACC : ", avg)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


#### FOLD : 0 ####
(1479, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1683, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2229, 4)


HBox(children=(FloatProgress(value=0.0, max=2229.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5277777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5277777777777778 to 0.5833333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5833333333333334 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6944444444444444 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6944444444444444
#### FOLD : 1 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1479, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1683, 4)
(2229, 4)


HBox(children=(FloatProgress(value=0.0, max=2229.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5833333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5833333333333334 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6944444444444444 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.75 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7777777777777778
#### FOLD : 2 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1479, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1683, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2229, 4)


HBox(children=(FloatProgress(value=0.0, max=2229.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.4166666666666667, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.4166666666666667 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.7777777777777778 to 0.8055555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.8055555555555556 to 0.8333333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.8055555555555556


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.8333333333333334
#### FOLD : 3 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1479, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1683, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2229, 4)


HBox(children=(FloatProgress(value=0.0, max=2229.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6111111111111112
#### FOLD : 4 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1479, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1683, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2229, 4)


HBox(children=(FloatProgress(value=0.0, max=2229.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.75 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7777777777777778


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.7777777777777778 to 0.8055555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.8055555555555556
#### FOLD : 5 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1479, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1683, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2229, 4)


HBox(children=(FloatProgress(value=0.0, max=2229.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6666666666666666
#### FOLD : 6 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1479, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1683, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2229, 4)


HBox(children=(FloatProgress(value=0.0, max=2229.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.7222222222222222 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7222222222222222


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.75 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7222222222222222


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.75
#### FOLD : 7 ####
(1478, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1680, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2222, 4)


HBox(children=(FloatProgress(value=0.0, max=2222.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.5555555555555556
#### FOLD : 8 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1478, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1680, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2222, 4)


HBox(children=(FloatProgress(value=0.0, max=2222.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))




Improvement from 0 to 0.4722222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.4722222222222222 to 0.5, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.5 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.6944444444444444, saving model ...
#### FOLD : 9 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1478, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1680, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2222, 4)


HBox(children=(FloatProgress(value=0.0, max=2222.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))




Improvement from 0 to 0.5, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.5 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=556.0), HTML(value='')))


not an improvement : 0.6388888888888888
#### FOLD : 10 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.4166666666666667, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.4166666666666667 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888
#### FOLD : 11 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.5277777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5277777777777778 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6944444444444444
#### FOLD : 12 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.4722222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.4722222222222222 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.5833333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.5277777777777778


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5833333333333334 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6111111111111112
#### FOLD : 13 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.5, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.5555555555555556


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888
#### FOLD : 14 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.75 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.7777777777777778


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.7777777777777778


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.7777777777777778


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.75
#### FOLD : 15 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.5, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5 to 0.5833333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5833333333333334 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888
#### FOLD : 16 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.7222222222222222 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.7777777777777778 to 0.8611111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.8333333333333334


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.8611111111111112


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.8333333333333334


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.8333333333333334
#### FOLD : 17 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.5277777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5277777777777778 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.75 to 0.8055555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.8055555555555556 to 0.8333333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.8333333333333334


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.8055555555555556


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.7777777777777778
#### FOLD : 18 ####
(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.7222222222222222 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.75
#### FOLD : 19 ####
(1477, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1677, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2215, 4)


HBox(children=(FloatProgress(value=0.0, max=2215.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=554.0), HTML(value='')))


not an improvement : 0.6666666666666666
#### FOLD : 20 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1669, 4)
(2189, 4)


HBox(children=(FloatProgress(value=0.0, max=2189.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))




Improvement from 0 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))


Improvement from 0.6944444444444444 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))


not an improvement : 0.7222222222222222


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))


not an improvement : 0.7222222222222222


HBox(children=(FloatProgress(value=0.0, max=548.0), HTML(value='')))


not an improvement : 0.7222222222222222
#### FOLD : 21 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.7222222222222222 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.75 to 0.8055555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.8055555555555556
#### FOLD : 22 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.7222222222222222


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6944444444444444
#### FOLD : 23 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.5277777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5277777777777778 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.7222222222222222 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.7222222222222222


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6944444444444444
#### FOLD : 24 ####
(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.75 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.7777777777777778 to 0.8055555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.8055555555555556


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.7222222222222222


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.8055555555555556


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.8055555555555556
#### FOLD : 25 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.4444444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.4444444444444444 to 0.5277777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.5277777777777778


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5277777777777778 to 0.5555555555555556, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5555555555555556 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.5833333333333334
#### FOLD : 26 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.4722222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.4722222222222222 to 0.5833333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5833333333333334 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.6944444444444444 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.7222222222222222 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6944444444444444
#### FOLD : 27 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.5833333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.5277777777777778


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5833333333333334 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666
#### FOLD : 28 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1476, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1672, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2196, 4)


HBox(children=(FloatProgress(value=0.0, max=2196.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))




Improvement from 0 to 0.5, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


Improvement from 0.5 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=549.0), HTML(value='')))


not an improvement : 0.6666666666666666
#### FOLD : 29 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1667, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2177, 4)


HBox(children=(FloatProgress(value=0.0, max=2177.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))




Improvement from 0 to 0.4444444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.4444444444444444 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666
#### FOLD : 30 ####
(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1667, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2177, 4)


HBox(children=(FloatProgress(value=0.0, max=2177.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))




Improvement from 0 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.6944444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6944444444444444 to 0.75, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.75 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666
#### FOLD : 31 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1667, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2177, 4)


HBox(children=(FloatProgress(value=0.0, max=2177.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))




Improvement from 0 to 0.4722222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.4722222222222222 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6111111111111112


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.5833333333333334


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.5833333333333334
#### FOLD : 32 ####
(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1667, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2177, 4)


HBox(children=(FloatProgress(value=0.0, max=2177.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))




Improvement from 0 to 0.6111111111111112, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6111111111111112 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.7777777777777778, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.75


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.7777777777777778


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.7777777777777778


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.75
#### FOLD : 33 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1667, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2177, 4)


HBox(children=(FloatProgress(value=0.0, max=2177.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))




Improvement from 0 to 0.4444444444444444, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.4444444444444444 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444
#### FOLD : 34 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1667, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2177, 4)


HBox(children=(FloatProgress(value=0.0, max=2177.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))




Improvement from 0 to 0.5833333333333334, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.5833333333333334 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6666666666666666 to 0.7222222222222222, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6944444444444444
#### FOLD : 35 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1475, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1667, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2177, 4)


HBox(children=(FloatProgress(value=0.0, max=2177.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))




Improvement from 0 to 0.5, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.5 to 0.6388888888888888, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


Improvement from 0.6388888888888888 to 0.6666666666666666, saving model ...


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6388888888888888


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666


HBox(children=(FloatProgress(value=0.0, max=545.0), HTML(value='')))


not an improvement : 0.6666666666666666
#### FOLD : 36 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1480, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1684, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2230, 4)


HBox(children=(FloatProgress(value=0.0, max=2230.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5428571428571428, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5428571428571428 to 0.6571428571428571, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6571428571428571 to 0.7142857142857143, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7142857142857143


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6857142857142857


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7142857142857143


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6857142857142857


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6571428571428571
#### FOLD : 37 ####
(1480, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1684, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2230, 4)


HBox(children=(FloatProgress(value=0.0, max=2230.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5142857142857142, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5142857142857142 to 0.6285714285714286, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6285714285714286 to 0.6571428571428571, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6571428571428571 to 0.7142857142857143, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6571428571428571


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7142857142857143


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6857142857142857


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7142857142857143
#### FOLD : 38 ####


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1480, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1684, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2230, 4)


HBox(children=(FloatProgress(value=0.0, max=2230.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5714285714285714, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5714285714285714 to 0.6571428571428571, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6571428571428571 to 0.6857142857142857, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6857142857142857 to 0.7142857142857143, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6857142857142857


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6285714285714286


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.5714285714285714


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.6
#### FOLD : 39 ####
(1480, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(1684, 4)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(2230, 4)


HBox(children=(FloatProgress(value=0.0, max=2230.0), HTML(value='')))




Some weights of XLMRobertaModel were not initialized from the model checkpoint at drive/MyDrive/zindi_nlp/MalawiNews/pretrain/xlm-r-large/checkpoint-80000 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))




Improvement from 0 to 0.5428571428571428, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.5428571428571428 to 0.6571428571428571, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.6571428571428571 to 0.7428571428571429, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7142857142857143


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


Improvement from 0.7428571428571429 to 0.8285714285714286, saving model ...


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.7714285714285715


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.8


HBox(children=(FloatProgress(value=0.0, max=558.0), HTML(value='')))


not an improvement : 0.8
AVG ACC :  0.729108635097493


In [None]:
print("Training Time : ", print(time.time()-START))
START = time.time()

38505.08796429634
Training Time :  None


In [None]:
with open(cfg.save_name_csv + "/" + f"{cfg.ckpt}-results-{avg}.txt" , "w") as f:
  # f.write(cfg_str)
  # f.write("\n\n")
  for r in results:
    f.write(f"{round(r,4)} \n")
  f.write(str(avg))

In [None]:
name_prob = [f"label_{x}" for x in range(cfg.num_class)]


In [None]:
sub["ID"] = test["ID"]

In [None]:
if BERTWEET_MODEL:
  #sub["ID"] = sub.ID.apply(lambda x : x.zfill(7))
  sub["Label"] = np.argmax(test_preds.mean(0), axis=1) 
  sub["Label"] = sub.Label.apply(lambda x: id2class[x])
  sub.to_csv(cfg.save_name_csv + f"submission-{round(avg,4)}.csv", index=False)
  sub.Label.value_counts()
  sub[name_prob] = test_preds.mean(0) 
  sub.to_csv(cfg.save_name_csv + f"submission-{round(avg,4)}-proba.csv", index=False)

In [None]:
sub.Label.value_counts()

POLITICS                105
LAW/ORDER                77
SOCIAL                   64
RELIGION                 62
HEALTH                   61
SOCIAL ISSUES            54
ECONOMY                  44
FARMING                  31
SPORTS                   23
RELATIONSHIPS            21
WILDLIFE/ENVIRONMENT     21
EDUCATION                18
LOCALCHIEFS              14
MUSIC                     9
CULTURE                   8
TRANSPORT                 4
ARTS AND CRAFTS           3
FLOODING                  1
Name: Label, dtype: int64

In [None]:
sub = pd.read_csv("SampleSubmission.csv")
sub["ID"] = test["ID"]

In [None]:
if BERTWEET_MODEL:
  #sub["ID"] = sub.ID.apply(lambda x : x.zfill(7))
  sub["Label"] = np.argmax(test_preds_left.mean(0), axis=1) 
  sub["Label"] = sub.Label.apply(lambda x: id2class[x])
  sub.to_csv(cfg.save_name_csv + f"submission-{round(avg,4)}-left.csv", index=False)
  sub.Label.value_counts()
  sub[name_prob] = test_preds_left.mean(0) 
  sub.to_csv(cfg.save_name_csv + f"submission-{round(avg,4)}-proba-left.csv", index=False)

In [None]:
sub.Label.value_counts()

POLITICS                106
LAW/ORDER                73
RELIGION                 67
HEALTH                   60
SOCIAL                   57
SOCIAL ISSUES            49
ECONOMY                  41
FARMING                  34
SPORTS                   24
RELATIONSHIPS            21
EDUCATION                20
WILDLIFE/ENVIRONMENT     19
LOCALCHIEFS              13
WITCHCRAFT                9
MUSIC                     7
CULTURE                   6
TRANSPORT                 6
FLOODING                  5
ARTS AND CRAFTS           3
Name: Label, dtype: int64