In [1]:
!pip install transformers
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension
!pip install -U easynmt
!pip install sacremoses

[0mEnabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m
Collecting easynmt
  Downloading EasyNMT-2.0.2.tar.gz (23 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting fasttext
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.8/68.8 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting pybind11>=2.2
  Using cached pybind11-2.11.1-py3-none-any.whl (227 kB)
Building wheels for collected packages: easynmt, fasttext
  Building wheel for easynmt (setup.py) ... [?25ldone
[?25h  Created wheel for easynmt: filename=EasyNMT-2.0.2-py3-none-any.whl size=19904 sha256=59c4552f64246ddd3399b3c84120d96e76dcf9eba88b24155d11a2f1866e98ba
  Stored in directory: /root/.cache/pip/wheels/51/19/60/37550e51634162d0317f08725130f360e64b6e9a83a149090c
  Building wheel for fasttext (setup.py) ... [?25ldone
[?25h  Created wheel for fasttext: filen

In [2]:
#!pip install protobuf==3.20.*
!pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m


Collecting safetensors>=0.3.1
  Downloading safetensors-0.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m62.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, accelerate
Successfully installed accelerate-0.27.2 safetensors-0.4.2
[0m

In [4]:
import torch
import transformers
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
from transformers import Trainer, TrainingArguments
from transformers import RobertaTokenizer, RobertaModel, RobertaConfig

from sklearn.utils import resample


import pandas as pd
import numpy as np
import os
from pathlib import Path

from sklearn.metrics import classification_report

if not torch.cuda.is_available():
    if torch.backends.mps.is_available():
        DEVICE = 'mps'
    else:
        DEVICE = 'cpu'
else:
    DEVICE = 'cuda:0'
print("Device:", DEVICE)
device = torch.device(DEVICE)


Device: cuda:0


In [5]:
# Initialization Cell
WORKING_ENV = 'PAPERSPACE' # Can be LABS, COLAB, PAPERSPACE, SAGEMAKER
USERNAME = '' # If working on Lab Machines - Your college username
assert WORKING_ENV in ['LABS', 'COLAB', 'PAPERSPACE', 'SAGEMAKER']

if WORKING_ENV == 'PAPERSPACE': # Using Paperspace
    !pip install ipywidgets
    content_path = '/notebooks/'
    data_path = './data/'
    
else:
  raise NotImplementedError()

content_path = Path(content_path)

[0m

In [11]:
# running locally
import os
content_path = os.getcwd()
data_path = f'{content_path}/data/'
content_path = Path(content_path)

# Setting up data and utils

In [6]:
# helper function to save predictions to an output file
def labels2file(p, outf_path):
	with open(outf_path,'w') as outf:
		for pi in p:
			outf.write(','.join([str(k) for k in pi])+'\n')

# Defining dataset class

In [7]:
from torch.utils.data import Dataset

# Define the custom dataset class
class PCLDataset(Dataset):
    def __init__(self, tokenizer, dataframe):
        self.tokenizer = tokenizer
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        dict_item = {'text': item['text'], 'label': item['label']}
        return dict_item

    def collate_fn(self, batch):
        texts = [item['text'] for item in batch]
        #labels = torch.tensor([item['label'] for item in batch], dtype=torch.float)
        real_labels = torch.tensor([item['label'] for item in batch], dtype=torch.long)
        encodings = self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=128)
        encodings['labels'] = real_labels
        return encodings

In [None]:
from dont_patronize_me import DontPatronizeMe


def load_datasets(downsample=False):
    dpm = DontPatronizeMe('.', '.')
    dpm.load_task1()
    trids = pd.read_csv('internal_train_par_ids.csv')
    teids = pd.read_csv('internal_dev_par_ids.csv')

    trids.par_id = trids.par_id.astype(str)
    teids.par_id = teids.par_id.astype(str)

    data=dpm.train_task1_df

    rows = [] # will contain par_id, label and text
    for idx in range(len(trids)):
        parid = trids.par_id[idx]
        keyword = data.loc[data.par_id == parid].keyword.values[0]
        text = data.loc[data.par_id == parid].text.values[0]
        label = data.loc[data.par_id == parid].label.values[0]
        rows.append({
            'par_id':parid,
            'community':keyword,
            'text':text,
            'label':label
        })

    trdf1 = pd.DataFrame(rows)

    if downsample:
        # downsample negative instances

        pcldf = trdf1[trdf1.label==1]
        npos = len(pcldf)

        training_set1 = pd.concat([pcldf,trdf1[trdf1.label==0][:npos*2]])
        trdf1 = training_set1

    rows = [] # will contain par_id, label and text
    for idx in range(len(teids)):
        parid = teids.par_id[idx]
        #print(parid)
        # select row from original dataset
        keyword = data.loc[data.par_id == parid].keyword.values[0]
        text = data.loc[data.par_id == parid].text.values[0]
        label = data.loc[data.par_id == parid].label.values[0]
        rows.append({
            'par_id':parid,
            'community':keyword,
            'text':text,
            'label':label
        })

    tedf1 = pd.DataFrame(rows)

    return trdf1, tedf1



In [None]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
train_df, dev_df = load_datasets(downsample=True)
train_set = PCLDataset(tokenizer, train_df)
dev_set_PCL = PCLDataset(tokenizer, dev_df)
dev_set = DataLoader(dev_set_PCL, batch_size=32)

# Custom Roberta

In [8]:
from transformers import RobertaModel, RobertaPreTrainedModel

class RoBERTaForPCL(RobertaPreTrainedModel):
    def __init__(self, config, dropout_rate=0.1, num_frozen_layers=0):
        super().__init__(config)
        self.roberta = RobertaModel(config)
        self.classifier = torch.nn.Linear(config.hidden_size, 1)
        self.dropout = torch.nn.Dropout(dropout_rate)

        # Freeze specified bottom layers
        if num_frozen_layers > 0:
            # Freeze embeddings if num_frozen_layers includes them
            if num_frozen_layers >= 1:
                for param in self.roberta.embeddings.parameters():
                    param.requires_grad = False
            
            # Freeze bottom transformer layers as specified by num_frozen_layers
            for layer in self.roberta.encoder.layer[:num_frozen_layers]:
                for param in layer.parameters():
                    param.requires_grad = False

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        outputs = self.roberta(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids,
                               position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds,
                               output_attentions=output_attentions, output_hidden_states=output_hidden_states,
                               return_dict=return_dict)
        pooled_output = self.dropout(outputs[1])
        logits = self.classifier(pooled_output)
        return logits


# Trainer and evaluation function definition

In [9]:
from sklearn.metrics import classification_report
import torch
from tqdm.auto import tqdm

def evaluate(model, tokenizer, data_loader):
    model.eval()  
    all_preds = []
    all_labels = []
    
    with torch.no_grad():  # No need to track gradients for evaluation
        for batch in tqdm(data_loader, desc="Evaluating"):
            
            encodings = tokenizer(batch['text'], return_tensors='pt', padding=True, truncation=True, max_length=128)
            inputs = {k: v.to(device) for k, v in encodings.items()}
            labels = batch['label']
            
            # Forward pass, get logits from the model
            outputs = model(**inputs)
            
            # Convert logits to probabilities and then to binary predictions
            probs = torch.sigmoid(outputs).squeeze() 
            preds = (probs > 0.5).long()  # Convert to binary predictions

            # Move preds and labels to CPU, convert to lists for classification_report
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())

    # Compute and print the classification report
    report = classification_report(all_labels, all_preds, target_names=["Not PCL", "PCL"], output_dict=True, zero_division=0)
    model.train()  
    return report

In [10]:
import wandb

class PCLTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        # Extract labels from inputs and remove them from the dict
        labels = inputs.pop("labels")
        
        # Forward pass
        outputs = model(**inputs)
        
        # Compute custom loss: Binary Cross-Entropy with Logits
        loss_fct = nn.BCEWithLogitsLoss()
        
        loss = loss_fct(outputs.view(-1), labels.float().view(-1))
        
        return (loss, outputs) if return_outputs else loss

    def evaluate(self, ignore_keys=None):
        eval_results = evaluate(self.model, self.tokenizer, self.eval_dataset)
        f1_score = eval_results['PCL']['f1-score']
        precision = eval_results['PCL']['precision']
        recall = eval_results['PCL']['recall']
        accuracy = eval_results['accuracy']

        print(f"Accuracy: {accuracy}, F1 Score: {f1_score}, Precision: {precision}, Recall: {recall}")

        # Log the results with wandb
        wandb.log({"eval_f1": f1_score, "precision": precision, "recall": recall, "accuracy": accuracy})
        return {"eval_f1": f1_score}

In [3]:
import wandb
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "roberta_finetuning.ipynb"
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [19]:
sweep_config = {
    'method': 'bayes',
    'metric': {
      'name': 'f1',
      'goal': 'maximize'   
    },
    'parameters': {
        'num_train_epochs': {
            'values': [3, 5, 10]
        },
        'learning_rate': {
            'values': [5e-5, 1e-5, 5e-4, 1e-4]
        },
        'per_device_train_batch_size': {
            'values': [16, 32, 64]
        },
        'frozen_layers': {
            'values': [0, 1, 4, 8, 10]
        },
        'dropout_rate': {
            'values': [0, 0.1, 0.3, 0.5]
        },
        'weight_decay': {
            'values': [0, 0.01, 0.001, 0.0001]
        },
        'scheduler': {
            'values': ['linear', 'cosine']
        }
    }
}


In [20]:
def tune_hyperparameters(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        config = wandb.config
        torch.manual_seed(6)

        # Load the datasets
        cache_dir = os.path.join(content_path, 'cache')
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base', cache_dir=cache_dir)

        training_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=config.num_train_epochs,
            learning_rate=config.learning_rate,
            per_device_train_batch_size=config.per_device_train_batch_size,
            weight_decay=config.weight_decay,
            lr_scheduler_type=config.scheduler,
            overwrite_output_dir=True,
            evaluation_strategy="epoch",
            report_to="wandb",
            run_name="roberta-finetuning-test",
            remove_unused_columns=False,
            logging_strategy='epoch',
            load_best_model_at_end=True,
            metric_for_best_model="eval_f1",
            greater_is_better=True,
            save_strategy="epoch",
            save_total_limit=1
        )


        model = RoBERTaForPCL.from_pretrained('roberta-base', cache_dir=cache_dir , dropout_rate=config.dropout_rate, num_frozen_layers=config.frozen_layers).to(device)

        print(f"The model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters")
        # Initialize Trainer
        trainer = PCLTrainer(
            model=model,
            args=training_args,
            train_dataset=train_set,
            eval_dataset=dev_set,
            data_collator=train_set.collate_fn,
            tokenizer=tokenizer
        )
        # Train the model
        trainer.train()
        # Evaluate the model
        results = trainer.evaluate()

        # Save the best model manually if it's better than the previous best
        if results["eval_f1"] > wandb.run.summary.get('best_f1', 0):
            wandb.run.summary['best_f1'] = results["eval_f1"]
            model_path = os.path.join('./best_model', wandb.run.name) 
            model.save_pretrained(model_path)



In [13]:
sweep_id = wandb.sweep(sweep=sweep_config, project="NLP_CW")

Create sweep with ID: o0z9do5c
Sweep URL: https://wandb.ai/alans-team/NLP_CW/sweeps/o0z9do5c


In [None]:
wandb.agent(sweep_id="o0z9do5c", function=tune_hyperparameters, count=30, project="NLP_CW")

## End of Stage 1 Evaluation

### Best model with upsampling

In [None]:
from dont_patronize_me import DontPatronizeMe


def load_datasets():
    dpm = DontPatronizeMe('.', '.')
    dpm.load_task1()
    trids = pd.read_csv('internal_train_par_ids.csv')
    teids = pd.read_csv('internal_dev_par_ids.csv')

    trids.par_id = trids.par_id.astype(str)
    teids.par_id = teids.par_id.astype(str)

    data=dpm.train_task1_df

    rows = [] # will contain par_id, label and text
    for idx in range(len(trids)):
        parid = trids.par_id[idx]
        keyword = data.loc[data.par_id == parid].keyword.values[0]
        text = data.loc[data.par_id == parid].text.values[0]
        label = data.loc[data.par_id == parid].label.values[0]
        rows.append({
            'par_id':parid,
            'community':keyword,
            'text':text,
            'label':label
        })

    trdf1 = pd.DataFrame(rows)


    traindf_majority = trdf1[trdf1['label'] == 0]
    traindf_minority = trdf1[trdf1['label'] == 1]
    traindf_minority_oversampled = resample(traindf_minority,
                                   replace=True,
                                   n_samples=len(traindf_majority),
                                   random_state=42)
    traindf_combined = pd.concat([traindf_majority, traindf_minority_oversampled])
    traindf_combined = traindf_combined.sample(frac=1, random_state=42).reset_index(drop=True)
    trdf1 = traindf_combined
    
    rows = [] # will contain par_id, label and text
    for idx in range(len(teids)):
        parid = teids.par_id[idx]
        #print(parid)
        # select row from original dataset
        keyword = data.loc[data.par_id == parid].keyword.values[0]
        text = data.loc[data.par_id == parid].text.values[0]
        label = data.loc[data.par_id == parid].label.values[0]
        rows.append({
            'par_id':parid,
            'community':keyword,
            'text':text,
            'label':label
        })

    tedf1 = pd.DataFrame(rows)

    return trdf1, tedf1



In [None]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
train_df, dev_df = load_datasets()
train_set = PCLDataset(tokenizer, train_df)
dev_set_PCL = PCLDataset(tokenizer, dev_df)
dev_set = DataLoader(dev_set_PCL, batch_size=32)

In [None]:
torch.manual_seed(6)

# Load the datasets
#cache_dir = os.path.join(content_path, 'cache')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=10,
    learning_rate=5e-5,
    per_device_train_batch_size=32,
    weight_decay=0,
    lr_scheduler_type='cosine',
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    report_to="wandb",
    run_name="roberta-finetuning-test",
    remove_unused_columns=False,
    logging_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
    save_strategy="epoch",
    save_total_limit=1
)


model = RoBERTaForPCL.from_pretrained('roberta-base', dropout_rate=0, num_frozen_layers=8).to(device)

print(f"The model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters")
# Initialize Trainer
trainer = PCLTrainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=dev_set,
    data_collator=train_set.collate_fn,
    tokenizer=tokenizer
)
# Train the model
trainer.train()
# Evaluate the model
results = trainer.evaluate()

Some weights of RoBERTaForPCL were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


The model has 28942849 trainable parameters


[34m[1mwandb[0m: Currently logged in as: [33malan-picucci[0m ([33malans-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/3560 [00:00<?, ?it/s]

{'loss': 0.3024, 'grad_norm': 10.743956565856934, 'learning_rate': 4.877641290737884e-05, 'epoch': 1.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9130850047755492, F1 Score: 0.5767441860465117, Precision: 0.5367965367965368, Recall: 0.6231155778894473
{'loss': 0.1026, 'grad_norm': 37.85075378417969, 'learning_rate': 4.522542485937369e-05, 'epoch': 2.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9106972301814709, F1 Score: 0.5701149425287356, Precision: 0.5254237288135594, Recall: 0.6231155778894473
{'loss': 0.0478, 'grad_norm': 0.017182234674692154, 'learning_rate': 3.969463130731183e-05, 'epoch': 3.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9164278892072588, F1 Score: 0.5430809399477807, Precision: 0.5652173913043478, Recall: 0.5226130653266332
{'loss': 0.0231, 'grad_norm': 2.8956139087677, 'learning_rate': 3.272542485937369e-05, 'epoch': 4.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9192932187201528, F1 Score: 0.536986301369863, Precision: 0.5903614457831325, Recall: 0.49246231155778897
{'loss': 0.0128, 'grad_norm': 0.009542226791381836, 'learning_rate': 2.5e-05, 'epoch': 5.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9149952244508118, F1 Score: 0.5364583333333334, Precision: 0.5567567567567567, Recall: 0.5175879396984925
{'loss': 0.006, 'grad_norm': 0.0067993635311722755, 'learning_rate': 1.7274575140626318e-05, 'epoch': 6.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9164278892072588, F1 Score: 0.55470737913486, Precision: 0.5618556701030928, Recall: 0.5477386934673367
{'loss': 0.0031, 'grad_norm': 0.0008445466519333422, 'learning_rate': 1.0305368692688174e-05, 'epoch': 7.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9207258834765998, F1 Score: 0.5229885057471264, Precision: 0.610738255033557, Recall: 0.457286432160804
{'loss': 0.0013, 'grad_norm': 0.0013111562002450228, 'learning_rate': 4.7745751406263165e-06, 'epoch': 8.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9192932187201528, F1 Score: 0.5493333333333333, Precision: 0.5852272727272727, Recall: 0.5175879396984925
{'loss': 0.0016, 'grad_norm': 0.0015656606992706656, 'learning_rate': 1.2235870926211619e-06, 'epoch': 9.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9192932187201528, F1 Score: 0.5517241379310345, Precision: 0.5842696629213483, Recall: 0.5226130653266332
{'loss': 0.0017, 'grad_norm': 0.0013653360074386, 'learning_rate': 0.0, 'epoch': 10.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9202483285577842, F1 Score: 0.5593667546174143, Precision: 0.5888888888888889, Recall: 0.5326633165829145
{'train_runtime': 1659.3487, 'train_samples_per_second': 68.533, 'train_steps_per_second': 2.145, 'train_loss': 0.05024582424692893, 'epoch': 10.0}


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9130850047755492, F1 Score: 0.5767441860465117, Precision: 0.5367965367965368, Recall: 0.6231155778894473


In [None]:
# Save the best model manually if it's better than the previous best
model_path = os.path.join('./best_model', 'stage_1_model_upsampling') 
model.save_pretrained(model_path)

Configuration saved in ./best_model/stage_1_model_mixed_language/config.json
Model weights saved in ./best_model/stage_1_model_mixed_language/pytorch_model.bin


### Best model with dataset augmented with mixed back translation

In [None]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
train_df, dev_df = load_datasets()
trdf1 = pd.read_csv('mixed_augmented_train_set.csv')
train_set = PCLDataset(tokenizer, trdf1)
dev_set_PCL = PCLDataset(tokenizer, dev_df)
dev_set = DataLoader(dev_set_PCL, batch_size=32)

In [None]:
torch.manual_seed(6)

# Load the datasets
#cache_dir = os.path.join(content_path, 'cache')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=10,
    learning_rate=5e-5,
    per_device_train_batch_size=32,
    weight_decay=0,
    lr_scheduler_type='cosine',
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    report_to="wandb",
    run_name="roberta-finetuning-test",
    remove_unused_columns=False,
    logging_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
    save_strategy="epoch",
    save_total_limit=1
)


model = RoBERTaForPCL.from_pretrained('roberta-base', dropout_rate=0, num_frozen_layers=8).to(device)

print(f"The model has {sum(p.numel() for p in model.parameters() if p.requires_grad)} trainable parameters")
# Initialize Trainer
trainer = PCLTrainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=dev_set,
    data_collator=train_set.collate_fn,
    tokenizer=tokenizer
)
# Train the model
trainer.train()
# Evaluate the model
results = trainer.evaluate()

Downloading pytorch_model.bin:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RoBERTaForPCL: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RoBERTaForPCL from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RoBERTaForPCL from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RoBERTaForPCL were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias', 'roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions an

The model has 28942849 trainable parameters


[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-356
Configuration saved in ./results/checkpoint-356/config.json


Accuracy: 0.9130850047755492, F1 Score: 0.4615384615384615, Precision: 0.5611510791366906, Recall: 0.39195979899497485


Model weights saved in ./results/checkpoint-356/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-356/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-356/special_tokens_map.json


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-712
Configuration saved in ./results/checkpoint-712/config.json


Accuracy: 0.9173829990448902, F1 Score: 0.3107569721115538, Precision: 0.75, Recall: 0.19597989949748743


Model weights saved in ./results/checkpoint-712/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-712/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-712/special_tokens_map.json


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-1068
Configuration saved in ./results/checkpoint-1068/config.json


Accuracy: 0.9173829990448902, F1 Score: 0.4175084175084175, Precision: 0.6326530612244898, Recall: 0.31155778894472363


Model weights saved in ./results/checkpoint-1068/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-1068/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-1068/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-712] due to args.save_total_limit


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-1424
Configuration saved in ./results/checkpoint-1424/config.json


Accuracy: 0.9097421203438395, F1 Score: 0.5378973105134475, Precision: 0.5238095238095238, Recall: 0.5527638190954773


Model weights saved in ./results/checkpoint-1424/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-1424/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-1424/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-356] due to args.save_total_limit


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-1780
Configuration saved in ./results/checkpoint-1780/config.json


Accuracy: 0.9164278892072588, F1 Score: 0.56575682382134, Precision: 0.5588235294117647, Recall: 0.5728643216080402


Model weights saved in ./results/checkpoint-1780/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-1780/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-1780/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-1068] due to args.save_total_limit


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-2136
Configuration saved in ./results/checkpoint-2136/config.json


Accuracy: 0.9140401146131805, F1 Score: 0.5287958115183247, Precision: 0.5519125683060109, Recall: 0.507537688442211


Model weights saved in ./results/checkpoint-2136/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-2136/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-2136/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-1424] due to args.save_total_limit


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-2492
Configuration saved in ./results/checkpoint-2492/config.json


Accuracy: 0.9169054441260746, F1 Score: 0.5, Precision: 0.5838926174496645, Recall: 0.4371859296482412


Model weights saved in ./results/checkpoint-2492/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-2492/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-2492/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-2136] due to args.save_total_limit


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-2848
Configuration saved in ./results/checkpoint-2848/config.json


Accuracy: 0.9154727793696275, F1 Score: 0.522911051212938, Precision: 0.563953488372093, Recall: 0.48743718592964824


Model weights saved in ./results/checkpoint-2848/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-2848/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-2848/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-2492] due to args.save_total_limit


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-3204
Configuration saved in ./results/checkpoint-3204/config.json


Accuracy: 0.9164278892072588, F1 Score: 0.5179063360881543, Precision: 0.573170731707317, Recall: 0.4723618090452261


Model weights saved in ./results/checkpoint-3204/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-3204/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-3204/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-2848] due to args.save_total_limit


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Saving model checkpoint to ./results/checkpoint-3560
Configuration saved in ./results/checkpoint-3560/config.json


Accuracy: 0.9169054441260746, F1 Score: 0.521978021978022, Precision: 0.5757575757575758, Recall: 0.47738693467336685


Model weights saved in ./results/checkpoint-3560/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-3560/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-3560/special_tokens_map.json
Deleting older checkpoint [results/checkpoint-3204] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-1780 (score: 0.56575682382134).


Evaluating:   0%|          | 0/66 [00:00<?, ?it/s]

Accuracy: 0.9164278892072588, F1 Score: 0.56575682382134, Precision: 0.5588235294117647, Recall: 0.5728643216080402


In [None]:
# Save the best model manually if it's better than the previous best
model_path = os.path.join('./best_model', 'stage_1_model_mixed_language') 
model.save_pretrained(model_path)

Configuration saved in ./best_model/stage_1_model_mixed_language/config.json
Model weights saved in ./best_model/stage_1_model_mixed_language/pytorch_model.bin
