<a href="https://colab.research.google.com/github/gizdatalab/CPU/blob/main/classifiers/TAPP/CPU_TAPP_ClimateBert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
! pip install datasets transformers sentencepiece huggingface_hub
! apt install git-lfs
! pip install sentence-transformers
# ! pip install optuna
! pip install evaluate
! pip install accelerate -U
! pip install codecarbon

In [None]:
from codecarbon import EmissionsTracker

tracker = EmissionsTracker()
tracker.start()

[codecarbon INFO @ 09:17:10] [setup] RAM Tracking...
[codecarbon INFO @ 09:17:10] [setup] GPU Tracking...
[codecarbon INFO @ 09:17:10] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 09:17:10] [setup] CPU Tracking...
[codecarbon INFO @ 09:17:11] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 09:17:11] >>> Tracker's metadata:
[codecarbon INFO @ 09:17:11]   Platform system: Linux-6.1.58+-x86_64-with-glibc2.35
[codecarbon INFO @ 09:17:11]   Python version: 3.10.12
[codecarbon INFO @ 09:17:11]   CodeCarbon version: 2.3.4
[codecarbon INFO @ 09:17:11]   Available RAM : 12.675 GB
[codecarbon INFO @ 09:17:11]   CPU count: 2
[codecarbon INFO @ 09:17:11]   CPU model: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 09:17:11]   GPU count: 1
[codecarbon INFO @ 09:17:11]   GPU model: 1 x Tesla T4


In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

[codecarbon INFO @ 09:17:26] Energy consumed for all CPUs : 0.000178 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 09:17:27] 0.000239 kWh of electricity used since the beginning.


In [None]:
from datasets import load_dataset
import datasets
import numpy as np
import pandas as pd
import evaluate
import torch
import os
import sklearn.metrics as skm
from transformers import (AutoTokenizer, AutoConfig, AutoModelForSequenceClassification,
                          PreTrainedModel, BertModel, BertForSequenceClassification,
                          RobertaForSequenceClassification,
                          TrainingArguments, Trainer, TrainerCallback)
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

[codecarbon INFO @ 09:17:41] Energy consumed for RAM : 0.000040 kWh. RAM Power : 4.753046035766602 W
[codecarbon INFO @ 09:17:41] Energy consumed for all GPUs : 0.000083 kWh. Total GPU Power : 9.994129734956493 W
[codecarbon INFO @ 09:17:41] Energy consumed for all CPUs : 0.000354 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 09:17:41] 0.000477 kWh of electricity used since the beginning.


# Manual Inputs


In [None]:
###### Define Path to data ############################
repo_id = "GIZ/policy_classification"
train_file="policy_classification_train.json"
test_file="policy_classification_test.json"


### Define labels ##################
label_names= ['TargetLabel','ActionLabel','PlansLabel','PolicyLabel']


#######  Define Model params ######################

# define the model checkpoint
model_checkpoint = "climatebert/distilroberta-base-climate-f"

# define the problem type
problem_type="multi_label_classification"


######## Training Arguments ##################
output_dir = "TAPP-multilabel-climatebert"

# logging and evaluation
evaluation_strategy = "steps"
logging_strategy="steps"
logging_steps = 500
eval_steps = 500

# Important params
learning_rate=3.06e-5
batch_size = 16
num_train_epochs= 5
weight_decay=0.05
warmup_steps = 200
gradient_accumulation_steps = 1

# Automated

## Dataset

In [None]:
# Load Dataset from Hugging Face
data_files = {"train": train_file, "test": test_file}
dataset = load_dataset(repo_id, data_files=data_files)

# the dataset already contains train_test split flag we use
# it to create train-test split.

train_df = dataset['train'].to_pandas()
test_df =dataset['test'].to_pandas()

# Sort the labels and create plaecholders for id2label and label2id
label_names.sort()

num_labels = len(label_names)
id2label = {idx:label for idx, label in enumerate(label_names)}
label2id = {label:idx for idx, label in enumerate(label_names)}
print(id2label)

[codecarbon INFO @ 09:17:57] Energy consumed for RAM : 0.000059 kWh. RAM Power : 4.753046035766602 W
[codecarbon INFO @ 09:17:57] Energy consumed for all GPUs : 0.000125 kWh. Total GPU Power : 9.896110026298683 W
[codecarbon INFO @ 09:17:57] Energy consumed for all CPUs : 0.000532 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 09:17:57] 0.000716 kWh of electricity used since the beginning.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/175 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.01M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

{0: 'ActionLabel', 1: 'PlansLabel', 2: 'PolicyLabel', 3: 'TargetLabel'}


In [None]:
def prepare_multilabel_df(df, cols):
    """
    Takes the dataframe and list of columns which need to combined into
    One-hot-encoded labels.

    """
    df.dropna(subset = cols, inplace=True)
    df = df.reset_index(drop=True)
    df['labels'] = df.apply(lambda x: [int(x[label]) for label in cols],axis=1)
    # we have multilingual dataset ('en', 'fr','es'), in case of non-english the
    # translated Context is udner the column 'TranslatedContext'. We impute these
    # back into 'context' to work with only English dataset.
    df['Context'] = df.apply(lambda x: x['Context'] if x['Language'] == 'en'
                                    else x['TranslatedContext'][0],axis=1)
    df.rename(columns = {'Context':'text'}, inplace=True)

    df = df[['text','labels']+cols]
    return df

# creating multilabel dataset
train_df = prepare_multilabel_df(train_df, label_names)
test_df = prepare_multilabel_df(test_df, label_names)

print('\n','Classes Representation in Training Dataset:',len(train_df))
for i,label in enumerate(label_names):
    print(i,".",label, ":", sum( [x[i] for x in list(train_df['labels'])]))

print('\n','Classes Representation in Test Dataset:',len(test_df))
for i,label in enumerate(label_names):
    print(i,".",label, ":", sum( [x[i] for x in list(test_df['labels'])]))


 Classes Representation in Training Dataset: 10031
0 . ActionLabel : 5416
1 . PlansLabel : 2140
2 . PolicyLabel : 1396
3 . TargetLabel : 2911

 Classes Representation in Test Dataset: 932
0 . ActionLabel : 513
1 . PlansLabel : 198
2 . PolicyLabel : 122
3 . TargetLabel : 256


In [None]:
# these are weights which can be used in building weighted loss if class is imbalanced
positive_weights = {}
negative_weights = {}

for i,label in enumerate(label_names):
    # df_train[sector] = df_train.apply(lambda x: x['sector_label'][i], axis =1)
    print(i,".",label, ":", sum(train_df[label]))
    pos = sum(train_df[label])
    neg = len(train_df) - sum(train_df[label])
    positive_weights[label] = max(neg/pos, 1.1)
    negative_weights[label] = min(pos/neg,1.0)
print(positive_weights)
print(negative_weights)

# as we dont want to miss anything (high recall) we use positive weights calculated above.
pos_weights = list(positive_weights.values())

# If using GPU we need to place all required data on else there will be error
posweights = torch.FloatTensor(pos_weights).to(device)

0 . ActionLabel : 5416
1 . PlansLabel : 2140
2 . PolicyLabel : 1396
3 . TargetLabel : 2911
{'ActionLabel': 1.1, 'PlansLabel': 3.6873831775700934, 'PolicyLabel': 6.185530085959885, 'TargetLabel': 2.4458948814840262}
{'ActionLabel': 1.0, 'PlansLabel': 0.2711950323152959, 'PolicyLabel': 0.161667631731326, 'TargetLabel': 0.4088483146067416}


`pos_weight > 1 will increase the recall while pos_weight < 1 will increase the precision.`

## Model

In [None]:
# problem_type, is not needed in tokenizer but keeping it for conformity
# https://huggingface.co/docs/transformers/main_classes/configuration?highlight=multi_label_classification#transformers.PretrainedConfig
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint,problem_type=problem_type)

# Set the model config and assign it and device to model
config = AutoConfig.from_pretrained(model_checkpoint, num_labels=num_labels, id2label=id2label, label2id=label2id, problem_type=problem_type)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,config=config).to(device)

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.15M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.98k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

[codecarbon INFO @ 09:18:12] Energy consumed for RAM : 0.000079 kWh. RAM Power : 4.753046035766602 W
[codecarbon INFO @ 09:18:12] Energy consumed for all GPUs : 0.000180 kWh. Total GPU Power : 13.330243557770126 W
[codecarbon INFO @ 09:18:12] Energy consumed for all CPUs : 0.000709 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 09:18:12] 0.000968 kWh of electricity used since the beginning.


config.json:   0%|          | 0.00/752 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

[codecarbon INFO @ 09:18:27] Energy consumed for RAM : 0.000099 kWh. RAM Power : 4.753046035766602 W
[codecarbon INFO @ 09:18:27] Energy consumed for all GPUs : 0.000295 kWh. Total GPU Power : 27.634853740886815 W
[codecarbon INFO @ 09:18:27] Energy consumed for all CPUs : 0.000886 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 09:18:27] 0.001280 kWh of electricity used since the beginning.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at climatebert/distilroberta-base-climate-f and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# we need datasets format to work with
train_ds = datasets.Dataset.from_pandas(train_df[['text','labels']])
test_ds =  datasets.Dataset.from_pandas(test_df[['text','labels']])
train_ds = train_ds.shuffle(seed=7)
test_ds = test_ds.shuffle(seed=7)


# Need to tokenize the data using the tokenizer of the model
def tokenize_and_encode(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True,
                        max_length=384)

cols = train_ds.column_names
cols.remove("labels")
print('Training data:',train_ds.num_rows)
print('Validation data:',test_ds.num_rows)

train_tokenized = train_ds.map(tokenize_and_encode, batched=True, remove_columns= cols)
val_tokenized = test_ds.map(tokenize_and_encode, batched=True, remove_columns= cols)

# need this to avoid error due to type mismatch
# https://discuss.pytorch.org/t/multi-label-binary-classification-result-type-float-cant-be-cast-to-the-desired-output-type-long/117915/3
train_tokenized.set_format("torch")
train_tokenized = (train_tokenized
          .map(lambda x : {"float_labels": x["labels"].to(torch.float)}, remove_columns=["labels"])
          .rename_column("float_labels", "labels"))

val_tokenized.set_format("torch")
val_tokenized = (val_tokenized
          .map(lambda x : {"float_labels": x["labels"].to(torch.float)}, remove_columns=["labels"])
          .rename_column("float_labels", "labels"))

Training data: 10031
Validation data: 932


Map:   0%|          | 0/10031 [00:00<?, ? examples/s]

Map:   0%|          | 0/932 [00:00<?, ? examples/s]

Map:   0%|          | 0/10031 [00:00<?, ? examples/s]

Map:   0%|          | 0/932 [00:00<?, ? examples/s]

## Metrics

In [None]:
multilabel_f1_metric = evaluate.load("f1", "multilabel")
multilabel_recall_metric = evaluate.load("recall","multilabel")
multilabel_precision_metric = evaluate.load("precision","multilabel")

def get_scores(y_pred, y_test, thresh=0.5, sigmoid=True):
    """
    Takes the Predictions and Truth Values, and return multilabel metrics
    aggregated on multiple strategies['micro', 'sample', 'weighted']

    """
    # we get raw function value, therefore we apply sigmoid to get to the
    # probabilities
    if problem_type == "multi_label_classification":
        y_pred = torch.from_numpy(y_pred)
        y_test = torch.from_numpy(y_test)
        if sigmoid:
          y_pred = y_pred.sigmoid()
          y_pred = (y_pred > thresh)

        return {
            "precision-micro": multilabel_precision_metric.compute(predictions=y_pred, references=y_test, average="micro")["precision"],
            "precision-samples": multilabel_precision_metric.compute(predictions=y_pred, references=y_test, average="samples")["precision"],
            "precision-weighted": multilabel_precision_metric.compute(predictions=y_pred, references=y_test, average="weighted")["precision"],
            "recall-micro": multilabel_recall_metric.compute(predictions=y_pred, references=y_test, average="micro")["recall"],
            "recall-samples": multilabel_recall_metric.compute(predictions=y_pred, references=y_test, average="samples")["recall"],
            "recall-weighted": multilabel_recall_metric.compute(predictions=y_pred, references=y_test, average="weighted")["recall"],
            "f1-micro": multilabel_f1_metric.compute(predictions=y_pred, references=y_test, average="micro")["f1"],
            "f1-samples": multilabel_f1_metric.compute(predictions=y_pred, references=y_test, average="samples")["f1"],
            "f1-weighted": multilabel_f1_metric.compute(predictions=y_pred, references=y_test, average="weighted")["f1"],
            }
    else:
        y_pred = torch.from_numpy(y_pred).argmax(dim=1)
        y_test = torch.from_numpy(y_test)

        report = skm.classification_report(y_test, y_pred, output_dict=True)
        df_report = pd.DataFrame(report).transpose()
        return {"Precision_macro": df_report.loc['macro avg']['precision'],
                "Precision_weighted": df_report.loc['weighted avg']['precision'],
                "Recall_macro": df_report.loc['macro avg']['recall'],
                "Recall_weighted": df_report.loc['weighted avg']['recall'],
                "F1-Score": df_report.loc['macro avg']['f1-score'],
                "Accuracy": skm.accuracy_score(y_test, y_pred)}


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    return get_scores(predictions, labels)

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

## Training arguments

In [None]:
args = TrainingArguments(
    output_dir,
    logging_strategy=logging_strategy,
    evaluation_strategy = evaluation_strategy,
    save_strategy = "epoch",
    logging_steps = logging_steps,
    eval_steps= eval_steps,
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    weight_decay=weight_decay,
    lr_scheduler_type = "cosine",
    gradient_accumulation_steps = gradient_accumulation_steps,
    warmup_steps = warmup_steps,
)

## Custom Trainer

In [None]:
# for class weights we need to use Custom Multi-Label Trainer
# In multi-label problem we will be using Binary Cross Entropy loss with
# sigmoid layer on top rather than softmax.
class MultilabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.BCEWithLogitsLoss(pos_weight=posweights, reduction='mean')
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

# Training

In [None]:
multi_trainer =  MultilabelTrainer(
    model,
    args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer)

[codecarbon INFO @ 09:18:40] [setup] RAM Tracking...
[codecarbon INFO @ 09:18:40] [setup] GPU Tracking...
[codecarbon INFO @ 09:18:40] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 09:18:40] [setup] CPU Tracking...
[codecarbon INFO @ 09:18:41] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 09:18:41] >>> Tracker's metadata:
[codecarbon INFO @ 09:18:41]   Platform system: Linux-6.1.58+-x86_64-with-glibc2.35
[codecarbon INFO @ 09:18:41]   Python version: 3.10.12
[codecarbon INFO @ 09:18:41]   CodeCarbon version: 2.3.4
[codecarbon INFO @ 09:18:41]   Available RAM : 12.675 GB
[codecarbon INFO @ 09:18:41]   CPU count: 2
[codecarbon INFO @ 09:18:41]   CPU model: Intel(R) Xeon(R) CPU @ 2.00GHz
[codecarbon INFO @ 09:18:41]   GPU count: 1
[codecarbon INFO @ 09:18:41]   GPU model: 1 x Tesla T4


In [None]:
multi_trainer.train()

In [None]:
tracker.stop()

[codecarbon INFO @ 09:48:58] Energy consumed for RAM : 0.002513 kWh. RAM Power : 4.753046035766602 W
[codecarbon INFO @ 09:48:58] Energy consumed for all GPUs : 0.035081 kWh. Total GPU Power : 47.4442038132743 W
[codecarbon INFO @ 09:48:58] Energy consumed for all CPUs : 0.022496 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 09:48:58] 0.060090 kWh of electricity used since the beginning.


0.023357257687363648

In [None]:
multi_trainer.push_to_hub(commit_message='End of training')

training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

events.out.tfevents.1709630321.242a585e5d70.457.0:   0%|          | 0.00/11.3k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ppsingh/TAPP-multilabel-climatebert/commit/e18a82c2d666c9f29c87b4a8d8621c8980f43b56', commit_message='End of training', commit_description='', oid='e18a82c2d666c9f29c87b4a8d8621c8980f43b56', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
log_results = multi_trainer.state.log_history.copy()
logs = []
while len(log_results)>1:

    a = log_results[0]
    b = log_results[1]
    logs.append({**a,**b})
    log_results.pop(0)
    log_results.pop(0)

results_df = pd.DataFrame(logs)
results_df

Unnamed: 0,loss,grad_norm,learning_rate,epoch,step,eval_loss,eval_precision-micro,eval_precision-samples,eval_precision-weighted,eval_recall-micro,eval_recall-samples,eval_recall-weighted,eval_f1-micro,eval_f1-samples,eval_f1-weighted,eval_runtime,eval_samples_per_second,eval_steps_per_second
0,0.7627,9.833265,2.981792e-05,0.8,500,0.647086,0.623209,0.672657,0.638404,0.798898,0.774142,0.798898,0.700201,0.692888,0.706235,10.974,84.928,5.376
1,0.5542,5.285941,2.532499e-05,1.59,1000,0.611386,0.639309,0.675429,0.667092,0.815427,0.783262,0.815427,0.716707,0.699872,0.727904,10.9824,84.863,5.372
2,0.4219,5.488404,1.802847e-05,2.39,1500,0.614454,0.719603,0.723623,0.731102,0.798898,0.764485,0.798898,0.75718,0.723135,0.761316,10.9805,84.877,5.373
3,0.3268,6.921066,9.968894e-06,3.19,2000,0.636325,0.727197,0.738287,0.735803,0.805326,0.773784,0.805326,0.76427,0.737436,0.767162,10.9129,85.403,5.406
4,0.2477,2.048939,3.400225e-06,3.99,2500,0.650925,0.731544,0.735068,0.743917,0.800735,0.768866,0.800735,0.764577,0.731918,0.768633,10.9416,85.179,5.392
5,0.1989,2.290876,1.594618e-07,4.78,3000,0.652699,0.736754,0.742489,0.746852,0.804408,0.77441,0.804408,0.769096,0.738427,0.772116,11.1963,83.241,5.27


In [None]:
# import json
# with open("TAPP_climatebert_logs.json", "w") as fp:
#     json.dump(log_results , fp)

# Predict and Evaluate

## Trainer

In [None]:
predictions= multi_trainer.predict(val_tokenized)
pred,labels,_ = predictions
y_pred = torch.from_numpy(pred)
y_true = torch.from_numpy(labels)
y_prob = y_pred.sigmoid()
thresh = 0.5
y_pred = (y_prob>thresh).bool()
y_true = y_true.bool()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [None]:
cm = skm.multilabel_confusion_matrix(y_true, y_pred)
for i,label in enumerate(label_names):
    print(label)
    print(cm[i],'\n')

report = skm.classification_report(y_true, y_pred, output_dict=True)
df_report = pd.DataFrame(report).transpose()
df_report = df_report.reset_index()
mapping = {str(i):j for i,j in enumerate(label_names)}
df_report['index'] = df_report['index'].map(mapping).fillna(df_report['index'])
df_report.rename(columns = {'index':'label'}, inplace=True)
df_report

ActionLabel
[[333  86]
 [ 99 414]] 

PlansLabel
[[624 110]
 [ 58 140]] 

PolicyLabel
[[774  36]
 [ 26  96]] 

TargetLabel
[[597  79]
 [ 29 227]] 



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,label,precision,recall,f1-score,support
0,ActionLabel,0.828,0.807018,0.817374,513.0
1,PlansLabel,0.56,0.707071,0.625,198.0
2,PolicyLabel,0.727273,0.786885,0.755906,122.0
3,TargetLabel,0.74183,0.886719,0.807829,256.0
4,micro avg,0.738215,0.805326,0.770312,1089.0
5,macro avg,0.714276,0.796923,0.751527,1089.0
6,weighted avg,0.747732,0.805326,0.773267,1089.0
7,samples avg,0.743205,0.773695,0.739071,1089.0


## Pipeline

In [None]:
from transformers import pipeline
model_checkpoint = "ppsingh/TAPP-multilabel-climatebert"
pipe = pipeline("text-classification", model=model_checkpoint, top_k=None, device=device)

config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.2k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.15M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.98k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
predictions = pipe(list(test_df['text']))
pred = []
for prediction in predictions:
    pred.append({x['label']:x['score'] for x in prediction})
df_pred = pd.DataFrame(pred)
df_pred['labels'] = df_pred.apply(lambda x: np.array([x[label]
                            for label in label_names]) > 0.50,axis=1)
y_true = np.array(list(test_df['labels']))
y_pred = np.array(list(df_pred['labels']))

In [None]:
cm = skm.multilabel_confusion_matrix(y_true, y_pred)
for i,label in enumerate(label_names):
    print(label)
    print(cm[i],'\n')

ActionLabel
[[327  92]
 [ 95 418]] 

PlansLabel
[[617 117]
 [ 55 143]] 

PolicyLabel
[[768  42]
 [ 26  96]] 

TargetLabel
[[601  75]
 [ 33 223]] 



In [None]:
report = skm.classification_report(y_true, y_pred, output_dict=True)
df_report = pd.DataFrame(report).transpose()
df_report = df_report.reset_index()
mapping = {str(i):j for i,j in enumerate(label_names)}
df_report['index'] = df_report['index'].map(mapping).fillna(df_report['index'])
df_report.rename(columns = {'index':'label'}, inplace=True)
df_report

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,label,precision,recall,f1-score,support
0,ActionLabel,0.819608,0.814815,0.817204,513.0
1,PlansLabel,0.55,0.722222,0.624454,198.0
2,PolicyLabel,0.695652,0.786885,0.738462,122.0
3,TargetLabel,0.748322,0.871094,0.805054,256.0
4,micro avg,0.729685,0.808081,0.766885,1089.0
5,macro avg,0.703396,0.798754,0.746294,1089.0
6,weighted avg,0.739944,0.808081,0.770481,1089.0
7,samples avg,0.732564,0.774678,0.733778,1089.0
