# Multi-label Legal Text Classification for CIA

## Models and Experiments: Adaptive Pre-Training with Sentence BERT Models

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import csv
import gzip
import random

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import transformers
from transformers import AutoTokenizer, AutoModel, BertTokenizerFast, BertTokenizer
from transformers import BertForSequenceClassification, AdamW
from sentence_transformers import models, losses, datasets
from sentence_transformers import LoggingHandler, SentenceTransformer, util, InputExample
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from sentence_transformers.cross_encoder.evaluation import CESoftmaxAccuracyEvaluator
from sentence_transformers.datasets import DenoisingAutoEncoderDataset
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset, RandomSampler, SequentialSampler
from torch.nn.utils.rnn import pad_sequence
from tqdm import tqdm
import math
import logging
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.chdir("../..")
os.getcwd()

'/Users/janinedevera/Documents/School/MDS 2021-2023/Thesis/multilabel-legal-text-classification-CIA'

### Prepare data

In [4]:
# training data (by sentence)
text = pd.read_csv("data/01 legal_texts_pipeline_sentence.csv")

In [5]:
# for sentences
text['sentence_clean'] = text['sentence_clean'].astype(str)
text_list = text['sentence_clean'].values.tolist()

In [6]:
text_list

['v procedur and timefram for submit comment',
 'mci/imda would like to seek view and comment from the industri and member of the public on the abov issu and question',
 'respond should organis their submiss a follow a',
 'cover page includ their personal/compani particular and contact inform b',
 'tabl of content c. summari of major point d. statement of interest e. comment and f. conclus',
 'support materi may be place a an annex to the submiss',
 'all submiss should be written clearli and concis and should provid a reason explan for ani propos revis',
 'where feasibl respond should identifi the specif provis of the psa which they are comment on and explain the basi for their propos',
 'all submiss should reach mci/imda within week no later than decemb p.m .. respond are to adher to thi timelin and late submiss will not be consid',
 'submiss are to be in soft copi onli in microsoft word or pdf format',
 'plea submit your soft copi with the email subject “ public consult on the review

### I. TSDAE Pre-Training with Unlabeled Data

In [57]:
print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())

device = torch.device("mps")

True
True


In [54]:
# model names
bert_base = 'bert-base-uncased'
distil_roberta = 'distilroberta-base'
distil_bert = 'distilbert-base-uncased'
legal_bert = 'nlpaueb/legal-bert-base-uncased'

In [72]:
# initialize model 
model_name = bert_base
word_embedding_model = models.Transformer(model_name)
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), 'mean')
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [73]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
)

In [57]:
# prepare sample train data
random.seed(999)
text_list_sample = random.sample(text_list, 2000)

train_data_sample = DenoisingAutoEncoderDataset(text_list_sample)
loader_sample = DataLoader(train_data_sample, batch_size=8, shuffle=True, drop_last=True)

In [74]:
text_list_sample

['if ani petit ha been present for the wind up of the compani detail of it shall be given in the affidavit so far a within the immedi knowledg of the depon',
 'articl thi ministeri regul shall come into forc from the date of it promulg',
 'd imda will provid an opportun for public comment befor issu a preliminari decis and a final decis grant or deni the request',
 'where a compani ha power to re-issu debentur which have been redeem particular with respect to the debentur which can be so re-issu shall be includ in everi balanc sheet of the compani',
 'in the come period the economi will also be requir to grow in a more proport throughout the nation ’ s territori by encourag econom activ outsid java in the context of creat greater equiti in develop and reduc interregion discrep',
 'in the field of religion there is a diver awar among the peopl in practic religi teach',
 'the postal oper licens shall be valid insofar a the postal oper still oper their busi activ and fulfil their oblig',


In [None]:
next(iter(loader_sample))

In [75]:
# prepare train data full
train_data = DenoisingAutoEncoderDataset(text_list)
loader = DataLoader(train_data, batch_size=8, shuffle=True, drop_last=True)

In [76]:
loss = losses.DenoisingAutoEncoderLoss(model, tie_encoder_decoder=True)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertLMHeadModel: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertLMHeadModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.2.crossattention.self.value.bias', 'bert.encoder.layer.9.crossattention.self.query.weight', 'bert.encoder.layer.4.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.9.crossattention.self.key.bias', 'bert.encod

In [77]:
# hyperparameters 
num_epochs = 1
learning_rate = 0.001

In [78]:
model_pretrain_path = 'models/bert-tsdae-'+datetime.now().strftime("%Y-%m-%d_%H-%M")

In [None]:
model.fit(
    train_objectives=[(loader_sample, loss)],
    epochs=num_epochs,
    weight_decay=0,
    scheduler='constantlr',
    optimizer_params={'lr': learning_rate},
    show_progress_bar=True,
    output_path=model_pretrain_path
)

### II. Fine-Tuning: STS

In [19]:
sts_dataset_path = 'data/stsb/stsbenchmark.tsv.gz'

if not os.path.exists(sts_dataset_path):
    util.http_get('https://sbert.net/datasets/stsbenchmark.tsv.gz', sts_dataset_path)

In [80]:
# load trained model 
trained_tsdae = SentenceTransformer('models/tsdae-2023-03-22_12-39/')

In [22]:
# model parameters
train_batch_size = 8
num_epochs = 5

model_save_path = 'models/bert-tsdae-sts-'+datetime.now().strftime("%Y-%m-%d_%H-%M")

In [23]:
# dataset to dataloader
logging.info("Read STSbenchmark train dataset")

train_samples = []
dev_samples = []
test_samples = []
with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
    reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
    for row in reader:
        score = float(row['score']) / 5.0  # Normalize score to range 0 ... 1
        inp_example = InputExample(texts=[row['sentence1'], row['sentence2']], label=score)

        if row['split'] == 'dev':
            dev_samples.append(inp_example)
        elif row['split'] == 'test':
            test_samples.append(inp_example)
        else:
            train_samples.append(inp_example)

In [24]:
train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=train_batch_size)

In [25]:
# train loss
train_loss = losses.CosineSimilarityLoss(model=model)

In [26]:
# evaluator 
logging.info("Read STSbenchmark dev dataset")
evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name='sts-dev')

In [27]:
# configure training
warmup_steps = math.ceil(len(train_dataloader) * num_epochs  * 0.1) #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))

In [None]:
# train model
trained_tsdae.to(device)
trained_tsdae.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=num_epochs,
          evaluation_steps=1000,
          warmup_steps=warmup_steps,
          output_path=model_save_path)

In [None]:
# evaluation 
model = SentenceTransformer(model_save_path)
test_evaluator = EmbeddingSimilarityEvaluator.from_input_examples(test_samples, name='sts-test')
test_evaluator(model, output_path=model_save_path)

### III. Fine-Tuning: NLI

Prepare data

In [101]:
oecd_defs = pd.read_csv("data/02 oecd_definitions_stopwords_grouped.csv", index_col=0).rename(columns={'text_clean': 'defs_text'}) #oecd definitions
oecd_defs.at[3, 'Main'] = 'Others'

train_df_augmented = pd.read_csv("data/01 train_data_augmented.csv", index_col=0).rename(columns={'text_clean': 'legal_text'}) #labeled test data
test_df = pd.read_csv("data/01 test_data.csv", index_col=0).rename(columns={'text_clean': 'legal_text'})

In [39]:
train_data_nli = train_df_augmented.loc[train_df_augmented.index.repeat(4)].reset_index(drop=True)
oecd_defs_nli = pd.concat([oecd_defs] * len(train_df_augmented), ignore_index=True)
train_data_nli = pd.concat([train_data_nli, oecd_defs_nli], axis=1) #df with text-definition pairs
train_data_nli['label'] = np.where(train_data_nli['Category_New'] == train_data_nli['Main'], 'entailment', 'neutral') #create labels = 1 for entailment, 0 for neutral

In [102]:
# add none category to oecd_defs
none_description = 'This paragraph does not talk about limiting the number or range of suppliers, limiting the ability of competitors to compete, reducing incentive of suppliers to compete, or limiting the choices available to consumers.'

none = {'Main': 'None', 'defs_text': none_description, 'length': len(none_description)}
oecd_defs = oecd_defs.append(none, ignore_index=True)
oecd_defs

  oecd_defs = oecd_defs.append(none, ignore_index=True)


Unnamed: 0,Main,defs_text,length
0,A,limit the number of supplier lead to the risk ...,833
1,B,regul can affect a supplier 's abil to compet ...,781
2,C,regul can affect supplier behaviour by not onl...,459
3,Others,regul sometim limit choic avail to consum for ...,257
4,,This paragraph does not talk about limiting th...,217


In [108]:
#multilabel NLI
train_nli_multi = pd.merge(train_df_augmented, oecd_defs, left_on='Category_New', right_on='Main')
test_nli_multi = pd.merge(test_df, oecd_defs, left_on='Category_New', right_on='Main')

In [122]:
train_nli_multi['Category_New'].value_counts()

B         894
Others    743
C         662
A         624
None      522
Name: Category_New, dtype: int64

In [41]:
neutral_subset = train_data_nli.loc[train_data_nli['label'] == 'neutral'].sample(3000, random_state=999)
entailment_subset = train_data_nli.loc[train_data_nli['label'] == 'entailment']
train_data_balanced = pd.concat([neutral_subset, entailment_subset], ignore_index=True)
train_data_balanced['label'].value_counts()

neutral       3000
entailment    2923
Name: label, dtype: int64

In [46]:
test_data_nli = test_df.loc[test_df.index.repeat(4)].reset_index(drop=True)
oecd_defs_nli = pd.concat([oecd_defs] * len(test_df), ignore_index=True)
test_data_nli = pd.concat([test_data_nli, oecd_defs_nli], axis=1)
test_data_nli['label'] = np.where(test_data_nli['Category_New'] == test_data_nli['Main'], 'entailment', 'neutral') #create labels = 1 for entailment, 0 for neutral
test_data_nli['label'].value_counts()

neutral       2349
entailment     483
Name: label, dtype: int64

In [205]:
# split to train and test
#train_df, val_df = train_test_split(test_data_nli, test_size=0.2, stratify = test_data_nli['label'] , random_state=42)

In [111]:
train_df, val_df = train_nli_multi, test_nli_multi

In [112]:
# create class of NLI dataset
class MNLIDataBert():

  def __init__(self, train_df, val_df):
    self.label_dict = {'None':0 ,'A': 1, 'B': 2, 'C': 3, 'Others': 4}

    self.train_df = train_df
    self.val_df = val_df

    self.base_path = '/content/'
    self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) # Using a pre-trained BERT tokenizer to encode sentences
    self.train_data = None
    self.val_data = None
    self.init_data()

  def init_data(self):
    self.train_data = self.load_data(self.train_df)
    self.val_data = self.load_data(self.val_df)

  def load_data(self, df):
    MAX_LEN = 512
    token_ids = []
    mask_ids = []
    seg_ids = []
    y = []

    premise_list = df['legal_text'].to_list()
    hypothesis_list = df['defs_text'].to_list()
    label_list = df['Category_New'].to_list()

    # combine premise and hypothesis sequences
    for (premise, hypothesis, label) in zip(premise_list, hypothesis_list, label_list):

      premise_id = self.tokenizer.encode(premise, add_special_tokens = False, truncation='longest_first', max_length = 254)
      hypothesis_id = self.tokenizer.encode(hypothesis, add_special_tokens = False, truncation='longest_first', max_length = 255)

      pair_token_ids = [self.tokenizer.cls_token_id] + premise_id + [self.tokenizer.sep_token_id] + hypothesis_id + [self.tokenizer.sep_token_id]
      premise_len = len(premise_id)
      hypothesis_len = len(hypothesis_id)

      segment_ids = torch.tensor([0] * (premise_len + 2) + [1] * (hypothesis_len + 1))  # sentence 0 and sentence 1
      attention_mask_ids = torch.tensor([1] * (premise_len + hypothesis_len + 3))  # mask padded values

      token_ids.append(torch.tensor(pair_token_ids))
      seg_ids.append(segment_ids)
      mask_ids.append(attention_mask_ids)
      y.append(self.label_dict[label])
    
    token_ids = pad_sequence(token_ids, batch_first=True)
    mask_ids = pad_sequence(mask_ids, batch_first=True)
    seg_ids = pad_sequence(seg_ids, batch_first=True)
    y = torch.tensor(y)
    dataset = TensorDataset(token_ids, mask_ids, seg_ids, y)
    print(len(dataset))
    return dataset

  def get_data_loaders(self, batch_size=32, shuffle=True):
    train_loader = DataLoader(
      self.train_data,
      shuffle=shuffle,
      batch_size=batch_size
    )

    val_loader = DataLoader(
      self.val_data,
      shuffle=shuffle,
      batch_size=batch_size
    )

    return train_loader, val_loader


In [113]:
mnli_dataset = MNLIDataBert(train_df, val_df)

3445
708


In [114]:
train_loader, val_loader = mnli_dataset.get_data_loaders(batch_size=16)

In [126]:
train_loader, _ = mnli_dataset.get_data_loaders(batch_size=1, shuffle=False)
for batch in train_loader:
    print([b.shape for b in batch])
    break

[torch.Size([1, 512]), torch.Size([1, 512]), torch.Size([1, 512]), torch.Size([1])]


In [127]:
# Define model and optimizer
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=5, max_length=512)

param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [128]:
def multi_acc(y_pred, y_test):
  acc = (torch.log_softmax(y_pred, dim=1).argmax(dim=1) == y_test).sum().float() / float(y_test.size(0))
  return acc

import time

EPOCHS = 10

def train(model, train_loader, val_loader, optimizer, save_dir):  
  total_step = len(train_loader)
  best_val_acc = 0.0

  for epoch in range(EPOCHS):
    start = time.time()
    model.train()
    total_train_loss = 0
    total_train_acc  = 0
    for batch_idx, (pair_token_ids, mask_ids, seg_ids, y) in enumerate(train_loader):
      optimizer.zero_grad()
      pair_token_ids = pair_token_ids.to(device)
      mask_ids = mask_ids.to(device)
      seg_ids = seg_ids.to(device)
      labels = y.to(device)

      loss, prediction = model(pair_token_ids, 
                             token_type_ids=seg_ids, 
                             attention_mask=mask_ids, 
                             labels=labels).values()

      acc = multi_acc(prediction, labels)

      loss.backward()
      optimizer.step()
      
      total_train_loss += loss.item()
      total_train_acc  += acc.item()

    train_acc  = total_train_acc/len(train_loader)
    train_loss = total_train_loss/len(train_loader)
    model.eval()
    total_val_acc  = 0
    total_val_loss = 0
    with torch.no_grad():
      for batch_idx, (pair_token_ids, mask_ids, seg_ids, y) in enumerate(val_loader):
        optimizer.zero_grad()
        pair_token_ids = pair_token_ids.to(device)
        mask_ids = mask_ids.to(device)
        seg_ids = seg_ids.to(device)
        labels = y.to(device)
        
        loss, prediction = model(pair_token_ids, 
                             token_type_ids=seg_ids, 
                             attention_mask=mask_ids, 
                             labels=labels).values()
        
        acc = multi_acc(prediction, labels)

        total_val_loss += loss.item()
        total_val_acc  += acc.item()

    val_acc  = total_val_acc/len(val_loader)
    val_loss = total_val_loss/len(val_loader)
    end = time.time()
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)

    print(f'Epoch {epoch+1}: train_loss: {train_loss:.4f} train_acc: {train_acc:.4f} | val_loss: {val_loss:.4f} val_acc: {val_acc:.4f}')
    print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

    if val_acc > best_val_acc:
      best_val_acc = val_acc
      save_path = os.path.join('models/', f"model_epoch_{epoch+1}_val_acc_{val_acc:.4f}.pt")
      #save_path = os.path.join(save_dir, f"model_epoch_{epoch+1}_val_acc_{val_acc:.4f}.pt")
      torch.save(model.state_dict(), save_path)
      print(f"Saved model checkpoint to {save_path}")

In [129]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 109,486,085 trainable parameters


In [130]:
# Freeze all layers except the last one
for param in model.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True

# Define the optimizer for the last layer
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=1e-5)

# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train(model, train_loader, val_loader, optimizer, 'models/')

Epoch 1: train_loss: 0.7143 train_acc: 0.7974 | val_loss: 3.0743 val_acc: 0.3028
00:32:28.85
Saved model checkpoint to models/model_epoch_1_val_acc_0.3028.pt
Epoch 2: train_loss: 0.7212 train_acc: 0.7811 | val_loss: 3.0867 val_acc: 0.3028
00:31:08.38
Epoch 3: train_loss: 0.7229 train_acc: 0.7817 | val_loss: 3.0513 val_acc: 0.3069
00:31:15.38
Saved model checkpoint to models/model_epoch_3_val_acc_0.3069.pt
Epoch 4: train_loss: 0.7212 train_acc: 0.7803 | val_loss: 3.0515 val_acc: 0.3069
00:31:04.49
Epoch 5: train_loss: 0.7172 train_acc: 0.7803 | val_loss: 3.0088 val_acc: 0.3111
00:30:54.27
Saved model checkpoint to models/model_epoch_5_val_acc_0.3111.pt
Epoch 6: train_loss: 0.7135 train_acc: 0.7797 | val_loss: 3.0007 val_acc: 0.3069
00:30:53.82
Epoch 7: train_loss: 0.7099 train_acc: 0.7823 | val_loss: 3.0004 val_acc: 0.3069
00:30:52.65
Epoch 8: train_loss: 0.7090 train_acc: 0.7788 | val_loss: 2.9654 val_acc: 0.3111
00:31:01.09
Epoch 9: train_loss: 0.7100 train_acc: 0.7785 | val_loss: 2.9

In [131]:
from sklearn.metrics import confusion_matrix
import torch

# load the saved model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
model.load_state_dict(torch.load('models/model_epoch_1_val_acc_0.3069.pt'))
model.eval() # set model to evaluation mode

# define a function to get predictions from the model
def get_predictions(model, data_loader):
    y_true = []
    y_pred = []
    for pair_token_ids, mask_ids, seg_ids, labels in data_loader:
        pair_token_ids = pair_token_ids.to(device)
        mask_ids = mask_ids.to(device)
        seg_ids = seg_ids.to(device)
        labels = labels.to(device)
        with torch.no_grad():
            outputs = model(pair_token_ids, 
                             token_type_ids=seg_ids, 
                             attention_mask=mask_ids)
            _, predicted = torch.max(outputs.logits.data, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    return y_true, y_pred

# get predictions on test data
y_true, y_pred = get_predictions(model, val_loader)

# compute and print confusion matrix
report = classification_report(y_true, y_pred)
print(report)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

              precision    recall  f1-score   support

           0       0.93      0.89      0.91       225
           1       0.00      0.00      0.00       376
           2       0.00      0.00      0.00        68
           3       0.03      0.94      0.07        18
           4       0.00      0.00      0.00        21

    accuracy                           0.31       708
   macro avg       0.19      0.37      0.20       708
weighted avg       0.30      0.31      0.29       708



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
