In [32]:
# Imports
import pandas as pd
import numpy as np
import os
import warnings

# Preprocessing

In [2]:
# Load data

team_id = '20' #put your team id here
split = 'test_1' # replace by 'test_2' for FINAL submission

df = pd.read_csv('dataset/tweets_train.csv')
df_test = pd.read_csv(f'dataset/tweets_{split}.csv')

In [3]:
df['words_str'] = df['words'].apply(lambda words: ' '.join(eval(words)))
df_test['words_str'] = df_test['words'].apply(lambda words: ' '.join(eval(words)))

In [4]:
from sklearn import preprocessing
from transformers import AutoTokenizer, AutoModel
from transformers import TrainerCallback, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch
import numpy as np
import torch.nn.functional as F
import gc
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [6]:
X = df['words_str']
y_text = df['sentiment']
# y_text = df.sentiment.values
le = preprocessing.LabelEncoder()
le.fit(y_text)
print(f'Original classes {le.classes_}')
print(f'Corresponding numeric classes {le.transform(le.classes_)}')
y =le.transform(y_text)
print(f"X: {X.shape}")
print(f"y: {y.shape} {np.unique(y)}")

Original classes ['negative' 'neutral' 'positive']
Corresponding numeric classes [0 1 2]
X: (8000,)
y: (8000,) [0 1 2]


In [7]:
# Splitting
# train_texts, val_texts, train_labels, val_labels = train_test_split(df['words_str'], y, test_size=0.2, shuffle=True, random_state=None)
train_texts, val_texts, train_labels, val_labels = train_test_split(df['words_str'], y, test_size=0.2, shuffle=True, random_state=42)

In [8]:
# Tokenize the input
# tokenizer_twitter = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base')
tokenizer_twitter = AutoTokenizer.from_pretrained('models/twitter-roberta-base/')

In [9]:
tokenizer = tokenizer_twitter
train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True)
val_encodings = tokenizer(val_texts.tolist(), truncation=True, padding=True)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [10]:
# histogram of class distribution
hist, bins = np.histogram(train_labels, bins=3)
print(f'Original class distribution: {hist}')
# class weights
class_count = [hist[0], hist[1], hist[2]]
alpha = torch.tensor([1.0 / c for c in class_count]).cuda()
alpha = alpha / alpha.sum()

Original class distribution: [ 395 4200 1805]


In [11]:
class ClassificationDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels.astype('int') # Change to integer type

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long) # Change to long type for classification
        return item

    def __len__(self):
        return len(self.labels)


train_dataset = ClassificationDataset(train_encodings, train_labels)
val_dataset = ClassificationDataset(val_encodings, val_labels)


# Function to compute f1_macro
def f1_macro(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return {'f1_macro': f1_score(labels, predictions, average='macro')}

class FocalLoss(nn.modules.loss._WeightedLoss):
    def __init__(self, weight=None, gamma=2,reduction='mean'):
        super(FocalLoss, self).__init__(weight,reduction=reduction)
        self.gamma = gamma
        self.weight = weight #parameter to balance class weights

    def forward(self, input, target):

        ce_loss = nn.functional.cross_entropy(input, target,reduction=self.reduction,weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma * ce_loss).mean()
        return focal_loss

loss_fn = nn.CrossEntropyLoss()
# loss_fn = FocalLoss(weight = alpha, gamma=2)

class RobertaClassificationTwitter_2(nn.Module):
    def __init__(self, dropout=0.1):
        super(RobertaClassificationTwitter_2, self).__init__()
        # self.roberta = AutoModel.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment-latest')
        self.roberta = AutoModel.from_pretrained('models/twitter-roberta-base-sentiment-latest/')
        self.dropout = nn.Dropout(dropout)
        hidden_size = self.roberta.config.hidden_size

        # Adding an additional hidden layer
        self.hidden_layer = nn.Linear(hidden_size, hidden_size//2)
        
        # Adding L2 regularization (weight decay) to the hidden layer
        self.regularization = nn.LayerNorm(hidden_size//2)
        
        # Final classification layer with 3 classes
        self.classifier = nn.Linear(hidden_size//2, 3)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.roberta(input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        
        # Passing through the hidden layer with ReLU activation
        hidden_output = self.hidden_layer(pooled_output)
        hidden_output = F.relu(hidden_output)
        
        # Applying Layer Normalization (regularization)
        hidden_output = self.regularization(hidden_output)
        
        logits = self.classifier(hidden_output)
        
        loss = None
        if labels is not None:
            # loss = nn.CrossEntropyLoss()(logits, labels)
            loss = loss_fn(logits, labels)
            # loss = loss_fn(logits, labels)
        
        return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}

    
    
    
class EarlyStoppingCallback(TrainerCallback):
    def __init__(self, patience=4):
        self.patience = patience
        self.best_score = None
        self.early_stop_counter = 0

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        f1 = metrics['f1_macro']  # Make sure this key matches what's returned by your compute_metrics function
        if self.best_score is None or f1 > self.best_score:
            self.best_score = f1
            self.early_stop_counter = 0
        else:
            self.early_stop_counter += 1
            if self.early_stop_counter >= self.patience:
                control.should_training_stop = True
        return control
    
class ThresholdEarlyStoppingCallback(TrainerCallback):
    def on_evaluate(self, args, state, control, metrics, **kwargs):
        f1 = metrics['f1_macro'] # Make sure this key matches what's returned by your compute_metrics function
        if f1 > 0.8:
            control.should_training_stop = True
        return control

class SaveBestF1Callback(TrainerCallback):
    def __init__(self, trainer, metric_name="f1_macro", save_path="./best_model"):
        self.trainer = trainer
        self.metric_name = metric_name
        self.best_metric = None
        self.save_path = save_path

    def on_log(self, args, state, control, logs, **kwargs):
        metric_value = logs.get(self.metric_name)
        if metric_value is not None and (self.best_metric is None or metric_value > self.best_metric):
            self.best_metric = metric_value
            print(f"New best {self.metric_name}: {self.best_metric}. Saving model to {self.save_path}")
            self.trainer.save_model(self.save_path)




In [12]:
dropout = 0.1
model_twitter = RobertaClassificationTwitter_2(dropout=dropout)

Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [31]:
# model.cpu()
# del model
# del train_dataset
# del val_dataset
# del study

gc.collect()
torch.cuda.empty_cache()

from GPUtil import showUtilization as gpu_usage
gpu_usage()     

| ID | GPU | MEM |
------------------
|  0 |  0% | 35% |
|  1 |  0% | 12% |


In [None]:
model = model_twitter.to(device)

# Define training arguments and trainer
training_args = TrainingArguments(
    output_dir='./output',
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    learning_rate=1e-4,
    num_train_epochs=3,
    logging_dir='./logs',
    evaluation_strategy='epoch', # Evaluate every 'logging_steps'
    # logging_steps=100, # Set to evaluate every 100 steps
    logging_strategy='epoch',
    weight_decay=0.0001,
    save_strategy='epoch',
    save_total_limit=5,
    load_best_model_at_end=True,
    metric_for_best_model='f1_macro',
    greater_is_better=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=f1_macro,
)

# save_best_f1_callback = SaveBestF1Callback(save_path='./best_f1_model', trainer=trainer)
# trainer.add_callback(save_best_f1_callback)

# Train the model
trainer.train()
eval_results = trainer.evaluate()
print(eval_results)

In [33]:
# hyperparameter tuning with optuna
warnings.filterwarnings('ignore')
epochs = 3
def objective(trial):
    # hyperparameters to tune
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    dropout = trial.suggest_float("dropout", 0.1, 0.5)

    model = RobertaClassificationTwitter_2(dropout=dropout).to(device)
    
    # Training arguments with hyperparameters
    training_args = TrainingArguments(
        output_dir='./output',
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        learning_rate=learning_rate,
        num_train_epochs=epochs,
        logging_dir='./logs',
        evaluation_strategy='epoch',
        # logging_steps=100, # Set to evaluate every 100 steps
        logging_strategy='epoch',
        weight_decay=weight_decay,
        save_strategy='epoch',
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model='f1_macro',
        greater_is_better=True,
    )

    # Model & Trainer
    model = RobertaClassificationTwitter_2(dropout=dropout).to(device)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=f1_macro,
    )

    # Train and evaluate the model
    trainer.train()
    eval_results = trainer.evaluate()
    
    # Return the evaluation metric
    return eval_results["eval_f1_macro"]

# Create a study object and specify the direction is 'maximize'
study = optuna.create_study(direction="maximize")

# Optimize the study, the objective function is passed in as the first argument
study.optimize(objective, n_trials=3)  # You can set n_trials to the desired number of trials

# Results
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2023-08-11 21:28:08,968] A new study created in memory with name: no-name-a803a5e6-f214-49b7-b618-cecc18ee8b5b
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias',

Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5707,0.490125,0.643906
2,0.3514,0.414041,0.751864
3,0.1999,0.51131,0.740793


[I 2023-08-11 21:29:45,027] Trial 0 finished with value: 0.7518642154386832 and parameters: {'learning_rate': 7.902613157566742e-05, 'weight_decay': 1.0402125300132789e-06, 'batch_size': 32, 'dropout': 0.46257866818554005}. Best is trial 0 with value: 0.7518642154386832.
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model 

Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5797,0.444418,0.683143
2,0.399,0.421756,0.732717
3,0.2867,0.441987,0.745107


[I 2023-08-11 21:30:57,353] Trial 1 finished with value: 0.7451067095847166 and parameters: {'learning_rate': 5.653390677957417e-05, 'weight_decay': 1.0815188813137109e-06, 'batch_size': 64, 'dropout': 0.37355716871050193}. Best is trial 0 with value: 0.7518642154386832.
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model 

Epoch,Training Loss,Validation Loss,F1 Macro
1,0.835,0.789307,0.270136
2,0.8113,0.772332,0.270136
3,0.8071,0.774316,0.270136


[I 2023-08-11 21:33:24,526] Trial 2 finished with value: 0.2701363073110285 and parameters: {'learning_rate': 0.0006035320965814507, 'weight_decay': 0.00034506423289245875, 'batch_size': 16, 'dropout': 0.2279246895962048}. Best is trial 0 with value: 0.7518642154386832.


Number of finished trials:  3
Best trial:
  Value:  0.7518642154386832
  Params: 
    learning_rate: 7.902613157566742e-05
    weight_decay: 1.0402125300132789e-06
    batch_size: 32
    dropout: 0.46257866818554005


In [23]:
epochs = 3

def model_init(trial):
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    return RobertaClassificationTwitter_2(dropout=dropout)

def objective(trial):
    # hyperparameters to be tuned by optuna
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])

    training_args = TrainingArguments(
        output_dir='./output',
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        learning_rate=learning_rate,
        num_train_epochs=epochs,
        logging_dir='./logs',
        evaluation_strategy='epoch',
        # logging_steps=100, # Set to evaluate every 100 steps
        logging_strategy='epoch',
        weight_decay=weight_decay,
        save_strategy='epoch',
        save_total_limit=1,
        load_best_model_at_end=True,
        metric_for_best_model='f1_macro',
        greater_is_better=True,
    )

    trainer = Trainer(
        model_init= lambda: model_init(trial),
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=f1_macro,
    )

    return trainer.hyperparameter_search(backend="optuna")

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=3)

[I 2023-08-11 21:05:39,270] A new study created in memory with name: no-name-4eeba2d7-b803-463e-b014-2531a90975ff
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[I 2023-08-11 21:05:40,890] A new study created in memory with name: no-name-55cd9b52-f84f-40e3-9893-54697fd4a2cd
Some weights of the model checkpoint at models/twitter-roberta-base-se

Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6291,0.494611,0.704977
2,0.4936,0.449256,0.706199
3,0.4342,0.503632,0.710595
4,0.4027,0.478792,0.716974
5,0.3873,0.457531,0.731539


[I 2023-08-11 21:07:34,787] Trial 0 finished with value: 0.7315385031129132 and parameters: {'learning_rate': 1.19699234861362e-05, 'num_train_epochs': 5, 'seed': 22, 'per_device_train_batch_size': 64}. Best is trial 0 with value: 0.7315385031129132.
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.6463,0.532852,0.624949
2,0.5238,0.518213,0.671026
3,0.4978,0.492467,0.690759
4,0.478,0.49525,0.703333


[I 2023-08-11 21:09:04,278] Trial 1 finished with value: 0.7033328534045699 and parameters: {'learning_rate': 6.266584023180392e-06, 'num_train_epochs': 4, 'seed': 25, 'per_device_train_batch_size': 64}. Best is trial 1 with value: 0.7033328534045699.
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5623,0.494656,0.667263
2,0.4206,0.443953,0.719286
3,0.3412,0.474311,0.718855
4,0.289,0.493638,0.717275


[I 2023-08-11 21:12:02,789] Trial 2 finished with value: 0.7172753368983571 and parameters: {'learning_rate': 1.3857482058374456e-05, 'num_train_epochs': 4, 'seed': 34, 'per_device_train_batch_size': 16}. Best is trial 1 with value: 0.7033328534045699.
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5694,0.477454,0.690726
2,0.3597,0.415277,0.742783
3,0.2089,0.509431,0.754743
4,0.1125,0.579623,0.755798


[I 2023-08-11 21:14:03,749] Trial 3 finished with value: 0.7557975683588235 and parameters: {'learning_rate': 6.565512371578846e-05, 'num_train_epochs': 4, 'seed': 3, 'per_device_train_batch_size': 32}. Best is trial 1 with value: 0.7033328534045699.
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch,Training Loss,Validation Loss,F1 Macro
1,0.5655,0.41406,0.749918
2,0.3966,0.482214,0.767347
3,0.2684,0.794538,0.756267
4,0.1735,0.85892,0.752619


[I 2023-08-11 21:23:02,617] Trial 4 finished with value: 0.7526188480795014 and parameters: {'learning_rate': 2.1919529374445238e-05, 'num_train_epochs': 4, 'seed': 11, 'per_device_train_batch_size': 4}. Best is trial 1 with value: 0.7033328534045699.
Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch,Training Loss,Validation Loss


[W 2023-08-11 21:23:11,550] Trial 5 failed with parameters: {'learning_rate': 4.149295132057096e-06, 'num_train_epochs': 4, 'seed': 5, 'per_device_train_batch_size': 32} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/berrang/miniconda3/envs/torch/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/home/berrang/miniconda3/envs/torch/lib/python3.10/site-packages/transformers/integrations.py", line 198, in _objective
    trainer.train(resume_from_checkpoint=checkpoint, trial=trial)
  File "/home/berrang/miniconda3/envs/torch/lib/python3.10/site-packages/transformers/trainer.py", line 1662, in train
    return inner_training_loop(
  File "/home/berrang/miniconda3/envs/torch/lib/python3.10/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/home/berrang/miniconda3/envs/torch/lib/pyt

KeyboardInterrupt: 

In [100]:
# Save the model
trainer.save_model('pretrained_models/roberta-base-twitter-clf')

# model = BertRegression.from_pretrained("./path/to/save/directory")

In [141]:
# load model
# create tokenizer using roberta-base
# run dataset, model and callback classes
model_path = 'pretrained_models/roberta-base-twitter-clf'
loaded_model = RobertaClassificationTwitter_2()
loaded_model.load_state_dict(torch.load(os.path.join(model_path, "pytorch_model.bin")))
loaded_model.to(device)

Some weights of the model checkpoint at models/twitter-roberta-base-sentiment-latest/ were not used when initializing RobertaModel: ['classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RobertaClassificationTwitter_2(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (

In [142]:
# Define training arguments and trainer
training_args = TrainingArguments(
    output_dir='./output',
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    learning_rate=1e-4,
    num_train_epochs=2,
    logging_dir='./logs',
    evaluation_strategy='steps', # Evaluate every 'logging_steps'
    logging_steps=100, # Set to evaluate every 100 steps
    weight_decay=0.0001,
    load_best_model_at_end=True
)

# Tokenize the test sentences
# sentences = list(df_test.words_str.values)
# test_encodings = tokenizer(sentences, truncation=True, padding=True)

# Convert to a PyTorch Dataset (using the renamed class)
# test_dataset = ClassificationTestDataset(test_encodings)

# Create a new Trainer instance with the loaded model
new_trainer = Trainer(
    model=loaded_model,
    args=training_args, # Assuming these are the same training args you used before
)

# Get predictions with the loaded model
# predictions = new_trainer.predict(test_dataset)
predictions = new_trainer.predict(val_dataset)
y_hat_prob_tensor = torch.tensor(predictions.predictions, dtype=torch.float32)

# Convert the probabilities to class labels
y_hat_labels = torch.argmax(y_hat_prob_tensor, dim=1).cpu().numpy()

# Save the results with the specified format
# directory = 'results'
# np.save(os.path.join(directory, f'{team_id}__{split}__class_pred.npy'), y_hat_labels)




In [144]:
print(classification_report(val_labels, y_hat_labels, target_names=le.classes_))

              precision    recall  f1-score   support

    negative       0.61      0.61      0.61        87
     neutral       0.91      0.88      0.89      1090
    positive       0.75      0.81      0.78       423

    accuracy                           0.85      1600
   macro avg       0.76      0.77      0.76      1600
weighted avg       0.85      0.85      0.85      1600



# Test

In [None]:
# Define a dataset without labels for testing
class ClassificationTestDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])


In [None]:
# Tokenize the test sentences
sentences = list(df_test.words_str.values)
test_encodings = tokenizer(sentences, truncation=True, padding=True)

# Convert to a PyTorch Dataset (using the renamed class)
test_dataset = ClassificationTestDataset(test_encodings)

# Get predictions with the neural network
predictions = trainer.predict(test_dataset)
y_hat_prob_tensor = torch.tensor(predictions.predictions, dtype=torch.float32)

# Convert the probabilities to class labels
y_hat_labels = torch.argmax(y_hat_prob_tensor, dim=1).cpu().numpy()

# Save the results with the specified format
directory = 'results'
np.save(os.path.join(directory, f'{team_id}__{split}__class_pred.npy'), y_hat_labels)


In [None]:
# Load 20__test_1__reg_pred.npy

d = np.load('results/20__test_1__clf_pred.npy', allow_pickle=True)
d.shape