In [1]:
import re
import shutil
from pathlib import Path

import sklearn.metrics
import torch
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

def export_labels_to_model(model_name: str, model) -> None:
    """
    Reads from a model configuration to export the labels of the class target to a file in the model's assets folder.
    
    Args:
      model_name (str): The name of the model. This is used to create a directory for the model.
      model: The model to export.
    """
    labels = model.config.label2id
    labels = sorted(labels, key=labels.get)

    model_assets_path = f'models/{model_name}/saved_model/1/assets'

    with open(f'{model_assets_path}/labels.txt', 'w') as f:
        f.write('\n'.join(labels))

def save_model_from_hub(model_name: str) -> None:
    """
    We load the model and tokenizer from the HuggingFace hub, save them to the `models` directory, and then export
    the labels of the model to the directory that contains all the assets.
    
    Args:
      model_name (str): The name of the model you want to save.
    """

    model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    model.save_pretrained(f'models/{model_name}', from_tf=True, save_format='tf', saved_model=True)
    tokenizer.save_pretrained(f'models/{model_name}_tokenizer', from_tf=True, save_format='tf')
    export_labels_to_model(model_name, model)

    print(f"Model {model_name} saved.")

def copy_tokenizer_vocab_to_model(model_name):
    """
    We copy the tokenizer's vocabulary to the model's directory, so that we can use the model for
    predictions.

    Args:
        model_name (str): The name of the model you want to use.
    """

    tokenizer_vocab_path = f'models/{model_name}_tokenizer/vocab.txt'
    model_assets_path = f'models/{model_name}/saved_model/1/assets'

    shutil.copyfile(tokenizer_vocab_path, f'{model_assets_path}/vocab.txt')
    

def prepare_model_from_hub(model_name: str, model_dir:str) -> None:
    """
    If the model directory doesn't exist, download the model from the HuggingFace Hub, and copy the tokenizer
    vocab to the model directory so that the format can be digested by Spark NLP.
    
    Args:
      model_name (str): The name of the model you want to use.
      model_dir (str): The directory where the model will be saved.
    """

    model_path = f'{model_dir}/{model_name}'

    if not Path(model_path).is_dir():
        save_model_from_hub(model_name)
        copy_tokenizer_vocab_to_model(model_name)

def get_label_metadata(dataset):
  """
  It takes a dataset and returns a list of labels, a dictionary mapping label ids to labels, and a
  dictionary mapping labels to label ids
  
  Args:
    dataset: the dataset object
  """
  labels = [label for label in dataset['train'].features.keys() if label not in ['text', 'label_descriptions']]
  id2label = dict(enumerate(labels))
  label2id = {label:idx for idx, label in enumerate(labels)}
  return labels, id2label, label2id

def compute_metrics(eval_pred):
  """
  It takes in the predictions and labels from the model, and returns a dictionary of metrics.
  Logits are converted into probabilities following a sigmoid function; then, the predictions are
  converted into binary values by comparing the probabilities to a threshold.
  
  Args:
    eval_pred: a tuple of (predictions, labels)
  
  Returns:
    A dictionary with the accuracy, f1_micro and f1_macro
  """
  sigmoid_threshold = 0.3
  #print(eval_pred)  
  predictions, labels = eval_pred
  #print( predictions, labels)  
  predictions = torch.from_numpy(predictions).sigmoid()
  #print(y_pred)
  labels = torch.from_numpy(labels) 
  #print( predictions, labels)    
  accuracy = accuracy_thresh(predictions, labels, sigmoid_threshold)
  f1_micro = sklearn.metrics.f1_score(labels, (predictions > sigmoid_threshold), average="micro")
  f1_macro = sklearn.metrics.f1_score(labels, (predictions > sigmoid_threshold), average="macro")
  #confusion_matrix = sklearn.metrics.confusion_matrix(labels.flatten(), (predictions > 0.5).flatten().astype(int))
  #print(confusion_matrix)  
  return {
      "accuracy_thresh": accuracy,
      "f1_micro": f1_micro,
      "f1_macro": f1_macro,
      "eval_f1": f1_micro
  }

def accuracy_thresh(y_pred, y_true, thresh): 
    """
    It takes in a predicted probability and a true label, and returns the accuracy of the prediction
    
    Args:
      y_pred: the predicted values
      y_true: the ground truth labels
      thresh: the threshold for the prediction to be considered a positive prediction.
    
    Returns:
      The mean of the accuracy of the predictions.
    """
    #y_pred = torch.from_numpy(y_pred).sigmoid()
    #print(y_pred)
    #y_true = torch.from_numpy(y_true)
    #print(y_true)
    return ((y_pred>thresh)==y_true.bool()).float().mean().item()


def prepare_splits_for_training(dataset, subset_data):
  """Splits and shuffles the dataset into train and test splits.

  Args:
      dataset (DatasetDict): The dataset to split. 
      subset_data (bool, optional): Flag to use a subset of the data.

  Returns:
      Tuple[Dataset]: One dataset object per train, test split.
  """
  fraction = 0.05 if subset_data else 1
  splits = [dataset["train"], dataset["test"]]

  return [
    split.shuffle(seed=42).select(range(int(len(split) * fraction)))
    for split in splits
  ]

def convert_to_tf_dataset(dataset, data_collator, shuffle_flag, batch_size):
  """
  We convert the dataset to a tf.data.Dataset object, which is a TensorFlow object that can be used
  to train a model
  
  Args:
    dataset: The dataset to convert to a tf.data.Dataset.
    data_collator: This is a function that takes in a list of tensors and returns a single tensor.
    shuffle_flag: Whether to shuffle the dataset or not.
    batch_size: The number of samples per batch.
  
  Returns:
    A tf.data.Dataset object
  """
  return (
      dataset.to_tf_dataset(
          columns=["attention_mask", "input_ids", "token_type_ids"],
          label_cols=["labels"],
          shuffle=shuffle_flag,
          collate_fn=data_collator,
          batch_size=batch_size
      )
  )

def preprocess_text(text: str):
    """Cleans and removes special characters from the text."""

    replacements = [
        (r"what's", "what is "),
        (r"won't", "will not "),
        (r"\'s", " "),
        (r"\'ve", " have "),
        (r"can't", "can not "),
        (r"n't", " not "),
        (r"i'm", "i am "),
        (r"\'re", " are "),
        (r"\'d", " would "),
        (r"\'ll", " will "),
        (r"\'scuse", " excuse "),
        (r"\'\n", " "),
        (r"-", " "),
        (r"\'\xa0", " "),
        (r"(@.*?)[\s]", " "),
        (r"&amp;", "&"),
    ]
    
    text = text.lower()
    for pattern, replacement in replacements:
        text = re.sub(pattern, replacement, text)

    text = re.sub(r"\s+", " ", text).strip()
    return text


  _torch_pytree._register_pytree_node(


In [2]:
import logging
from typing import Optional

from datasets import load_dataset
import numpy as np
import torch
import torch.nn as nn
from transformers import AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, TrainingArguments, Trainer
import typer

  _torch_pytree._register_pytree_node(


In [3]:
def tokenize(batch):
    """Tokenises the text and creates a numpy array with its assigned labels."""
    text = [preprocess_text(text) for text in batch["text"]]
    encoding = tokenizer(text, max_length=177, padding="max_length", truncation=True)

    labels_batch = {k: batch[k] for k in batch.keys() if k in labels}
    #print(labels_batch)
    labels_matrix = np.zeros((len(text), len(labels)))
    for idx, label in enumerate(labels):
        #print(label)
        labels_matrix[:, idx] = labels_batch[label]
    #print(labels_matrix)
    encoding["labels"] = labels_matrix.tolist()
    #print(encoding["labels"])
    return encoding

In [4]:
class MultilabelTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False):
            """
            Custom loss function calculation using BCEWithLogitsLoss, it returns the loss and the outputs if the
            return_outputs flag is set to True
            This function is used during training, evaluation, and prediction; specifically every time a batch is processed.
            The default loss function is here https://github.com/huggingface/transformers/blob/820c46a707ddd033975bc3b0549eea200e64c7da/src/transformers/trainer.py#L2561
            
            Args:
              model: the model we're training
              inputs: a dictionary of input tensors
              return_outputs: if True, the loss and the model outputs are returned. If False, only the loss is
            returned. Defaults to False
            
            Returns:
              The loss and the outputs of the model.
            """
            labels = inputs.pop("labels")
            # forward pass
            outputs = model(**inputs)
            logits = outputs.logits
            # compute custom loss
            loss_fct = torch.nn.BCEWithLogitsLoss()
            loss = loss_fct(logits.view(-1, self.model.config.num_labels), 
                            labels.float().view(-1, self.model.config.num_labels))
            return (loss, outputs) if return_outputs else loss


In [5]:
def instantiate_classifier(model_name,labels, id2label, label2id):
    """
    We're instantiating a BERT model, and then replacing the classification layer with a custom one for our task.
    
    Args:
      labels: a list of all the labels in the dataset
      id2label: a dictionary mapping from label ids to label names
      label2id: a dictionary mapping labels to integers
    
    Returns:
      A model with a classifier that has 3 layers.
    """

    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        problem_type="multi_label_classification",
        num_labels=len(labels),
        id2label=id2label,
        label2id=label2id,
    )
    model.classifier = nn.Sequential(
        nn.Linear(768, 50),
        nn.ReLU(),
        nn.Linear(50, len(labels))
    )
    return model

In [6]:

def training(
    epochs= 20 ,
    output_model_name= 'stop_reasons',
    subset_data: bool = False,
    push_to_hub: bool = False,
    personal_token: Optional[str] = None,
    model_name='domenicrosati/ClinicalTrialBioBert'
):
    """
    Main logic of the fine-tuning process: this function loads the dataset, tokenizes it,
    splits it into train and validation sets, loads the model, trains it, and saves it
    
    Args:
      epochs (int): number of epochs to train for
      output_model_name (str): filename and path to the directory where the model will be saved.
      subset_data (bool): flag to indicate whether to use a subset of the data for testing purposes
      push_to_hub (bool): flag to indicate whether to push the model to the hub
      personal_token (str | None): your personal Hugging Face Hub token
    """
    
    logging.basicConfig(level=logging.INFO)

    dataset = load_dataset("opentargets/clinical_trial_reason_to_stop", split='train').train_test_split(test_size=0.1, seed=42)
    #print(dataset)
    global labels
    labels, id2label, label2id = get_label_metadata(dataset)

    global tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=True)
    dataset_cols = [col for col in dataset["train"].column_names if col not in ["text", "input_ids", "attention_mask", "labels"]]
    tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=dataset_cols)
    #print(tokenized_dataset)
    train_dataset, test_dataset = prepare_splits_for_training(tokenized_dataset, subset_data)
    #print(train_dataset)
    logging.info(f"Train dataset length: {len(train_dataset)}")
    logging.info(f"Test dataset length: {len(test_dataset)}")
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    args = TrainingArguments(
        output_dir=output_model_name,
        evaluation_strategy="epoch",
        learning_rate=5e-5,
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        weight_decay=0.01,
        data_seed=42,
        num_train_epochs=epochs,
        metric_for_best_model="f1",
        save_total_limit=1,
        load_best_model_at_end=True,
        report_to=["tensorboard"],
        save_strategy='epoch'
    )
    trainer = MultilabelTrainer(
        model=instantiate_classifier(model_name,labels, id2label, label2id),
        args=args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        data_collator=data_collator,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    metrics = trainer.evaluate()
    print(metrics)
    predictions = trainer.predict(test_dataset)
    print(predictions)
    trainer.save_model(output_model_name)
    if push_to_hub:
        trainer.push_to_hub()

    return trainer

In [11]:
trainer=training(model_name='domenicrosati/ClinicalTrialBioBert')

INFO:root:Train dataset length: 3372
INFO:root:Test dataset length: 375
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at domenicrosati/ClinicalTrialBioBert and are newly initialized: ['bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,F1,Accuracy Thresh,F1 Micro,F1 Macro
1,No log,0.26617,0.0,0.934118,0.0,0.0
2,No log,0.221882,0.0,0.934118,0.0,0.0
3,No log,0.198114,0.429043,0.945726,0.429043,0.0982
4,No log,0.17414,0.55414,0.956078,0.55414,0.178022
5,0.239400,0.148752,0.637143,0.960157,0.637143,0.286526
6,0.239400,0.130123,0.678899,0.961569,0.678899,0.36702
7,0.239400,0.117275,0.736579,0.966902,0.736579,0.44665
8,0.239400,0.118048,0.71932,0.963765,0.71932,0.433648
9,0.239400,0.10841,0.744298,0.966588,0.744298,0.483299
10,0.098500,0.101032,0.768862,0.969725,0.768862,0.512534


{'eval_f1': 0.7759433962264151, 'eval_loss': 0.09728629887104034, 'eval_accuracy_thresh': 0.9701960682868958, 'eval_f1_micro': 0.7759433962264151, 'eval_f1_macro': 0.5247557221952606, 'eval_runtime': 0.7866, 'eval_samples_per_second': 476.753, 'eval_steps_per_second': 15.256, 'epoch': 20.0}
PredictionOutput(predictions=array([[-5.179295 , -3.4378521, -7.831208 , ..., -3.1311064, -8.554558 ,
        -5.128829 ],
       [-5.3846283, -4.1228495, -8.102286 , ..., -3.4741073, -8.864204 ,
        -5.245756 ],
       [-5.380349 , -3.9970558, -8.088544 , ..., -3.4552574, -8.8096485,
        -5.211628 ],
       ...,
       [ 1.6839526, -4.702727 , -3.568052 , ..., -2.6990564, -4.2404666,
        -4.796734 ],
       [-3.069668 , -4.4003973, -4.170887 , ...,  2.36044  , -6.1254663,
        -5.298255 ],
       [-3.1181903, -2.4374201, -5.98372  , ..., -2.8834758, -3.3140206,
        -2.538319 ]], dtype=float32), label_ids=array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],


In [13]:
from sklearn.metrics import classification_report
test_dataset = load_dataset("opentargets/clinical_trial_reason_to_stop", split='all')

# Tokenize test dataset

dataset_cols = [col for col in test_dataset.column_names if col not in ["text", "input_ids", "attention_mask", "labels"]]
tokenized_test_dataset = test_dataset.map(tokenize, batched=True, remove_columns=dataset_cols)
predictions = trainer.predict(tokenized_test_dataset).predictions
true_labels = np.array(tokenized_test_dataset["labels"]).astype(bool)
# Get predictions
sigmoid_threshold = 0.3
predictions = torch.from_numpy(predictions).sigmoid()
true_labels = torch.from_numpy(true_labels) 
# Convert probabilities to binary predictions using threshold
predicted_labels = (predictions > sigmoid_threshold)
report = classification_report(true_labels, predicted_labels, target_names=labels, digits=4)
print(report)

                         precision    recall  f1-score   support

          Another_Study     0.9455    0.9598    0.9526       199
Business_Administrative     0.9570    0.9545    0.9558       792
                Covid19     1.0000    1.0000    1.0000       183
           Endpoint_Met     0.7500    0.1176    0.2034        51
         Ethical_Reason     0.0000    0.0000    0.0000        17
      Insufficient_Data     0.0000    0.0000    0.0000        39
Insufficient_Enrollment     0.9832    0.9814    0.9823      1075
       Interim_Analysis     0.0000    0.0000    0.0000        28
         Invalid_Reason     0.9051    0.9160    0.9105       250
    Logistics_Resources     0.8407    0.8239    0.8322       301
               Negative     0.9672    0.9620    0.9646       368
             No_Context     0.9661    0.6867    0.8028        83
             Regulatory     0.8583    0.9196    0.8879       112
     Safety_Sideeffects     0.9761    0.9668    0.9714       211
           Study_Design 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
from peft import PeftModel,PeftConfig
def instantiate_classifier_finetuned(model_name,labels, id2label, label2id):
    """
    We're instantiating a BERT model, and then replacing the classification layer with a custom one for our task.
    
    Args:
      labels: a list of all the labels in the dataset
      id2label: a dictionary mapping from label ids to label names
      label2id: a dictionary mapping labels to integers
    
    Returns:
      A model with a classifier that has 3 layers.
    """

    model = AutoModelForSequenceClassification.from_pretrained(
       'domenicrosati/ClinicalTrialBioBert',
        problem_type="multi_label_classification",
        num_labels=len(labels),
        id2label=id2label,
        label2id=label2id,
    )
    model = PeftModel.from_pretrained(model,model_name,is_trainable=True)
    #model=model.merge_and_unload()
    for param in model.parameters():
          param.requires_grad = True
    model.classifier = nn.Sequential(
        nn.Linear(768, 50),
        nn.ReLU(),
        nn.Linear(50, len(labels))
    )
    return model

def training(
    epochs= 20 ,
    output_model_name= 'stop_reasons',
    subset_data: bool = False,
    push_to_hub: bool = False,
    personal_token: Optional[str] = None,
    model_name='domenicrosati/ClinicalTrialBioBert'
):
    """
    Main logic of the fine-tuning process: this function loads the dataset, tokenizes it,
    splits it into train and validation sets, loads the model, trains it, and saves it
    
    Args:
      epochs (int): number of epochs to train for
      output_model_name (str): filename and path to the directory where the model will be saved.
      subset_data (bool): flag to indicate whether to use a subset of the data for testing purposes
      push_to_hub (bool): flag to indicate whether to push the model to the hub
      personal_token (str | None): your personal Hugging Face Hub token
    """
    
    logging.basicConfig(level=logging.INFO)

    dataset = load_dataset("opentargets/clinical_trial_reason_to_stop", split='train').train_test_split(test_size=0.1, seed=42)
    #print(dataset)
    global labels
    labels, id2label, label2id = get_label_metadata(dataset)

    global tokenizer
    tokenizer = AutoTokenizer.from_pretrained('domenicrosati/ClinicalTrialBioBert', do_lower_case=True)
    dataset_cols = [col for col in dataset["train"].column_names if col not in ["text", "input_ids", "attention_mask", "labels"]]
    tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=dataset_cols)
    #print(tokenized_dataset)
    train_dataset, test_dataset = prepare_splits_for_training(tokenized_dataset, subset_data)
    #print(train_dataset)
    logging.info(f"Train dataset length: {len(train_dataset)}")
    logging.info(f"Test dataset length: {len(test_dataset)}")
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    args = TrainingArguments(
        output_dir=output_model_name,
        evaluation_strategy="epoch",
        learning_rate=5e-5,
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        weight_decay=0.01,
        data_seed=42,
        num_train_epochs=epochs,
        metric_for_best_model="f1",
        save_total_limit=1,
        load_best_model_at_end=False,
        report_to=["tensorboard"],
        save_strategy='no'
    )
    trainer = MultilabelTrainer(
        model= instantiate_classifier_finetuned(model_name,labels, id2label, label2id),
        args=args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        data_collator=data_collator,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    metrics = trainer.evaluate()
    print(metrics)
    predictions = trainer.predict(test_dataset)
    print(predictions)
    # trainer.save_model(output_model_name)
    # if push_to_hub:
    #     trainer.push_to_hub()

    return trainer

In [17]:
trainer_new=training(model_name='checkpoint-205835')

INFO:root:Train dataset length: 3372
INFO:root:Test dataset length: 375
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at domenicrosati/ClinicalTrialBioBert and are newly initialized: ['bert.pooler.dense.bias', 'classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,F1,Accuracy Thresh,F1 Micro,F1 Macro
1,No log,0.208425,0.246206,0.929882,0.246206,0.030672
2,No log,0.16248,0.552381,0.948392,0.552381,0.178522
3,No log,0.128364,0.633508,0.956078,0.633508,0.277524
4,No log,0.106369,0.747525,0.968,0.747525,0.481108
5,0.156800,0.094497,0.771635,0.970196,0.771635,0.510942
6,0.156800,0.093101,0.77129,0.97051,0.77129,0.510986
7,0.156800,0.088512,0.783133,0.971765,0.783133,0.547823
8,0.156800,0.08818,0.778973,0.97098,0.778973,0.543498
9,0.156800,0.094823,0.761457,0.968157,0.761457,0.553294
10,0.052600,0.09404,0.763593,0.968627,0.763593,0.528892


{'eval_f1': 0.7859649122807018, 'eval_loss': 0.09819208830595016, 'eval_accuracy_thresh': 0.971294105052948, 'eval_f1_micro': 0.7859649122807018, 'eval_f1_macro': 0.5814147499802844, 'eval_runtime': 0.7785, 'eval_samples_per_second': 481.673, 'eval_steps_per_second': 15.414, 'epoch': 20.0}
PredictionOutput(predictions=array([[-5.5445943 , -5.9934154 , -7.046057  , ..., -2.5061266 ,
        -6.350135  , -7.3434014 ],
       [-6.1627617 , -6.4783897 , -7.0276675 , ..., -4.198851  ,
        -6.9666696 , -7.855377  ],
       [-6.5516067 , -5.7640424 , -6.816563  , ..., -4.1927013 ,
        -6.7160454 , -7.5685945 ],
       ...,
       [ 3.4823356 , -2.1001017 , -5.1224527 , ..., -5.0763335 ,
        -4.8721075 , -5.421452  ],
       [-4.9112577 , -5.0351677 , -6.372191  , ...,  3.4419606 ,
        -5.538386  , -6.7094398 ],
       [-5.9622145 , -0.81831294, -6.0952506 , ..., -1.5838529 ,
        -4.2679796 , -5.8804593 ]], dtype=float32), label_ids=array([[0., 0., 0., ..., 0., 0., 0.],
   

In [18]:
from sklearn.metrics import classification_report
test_dataset = load_dataset("opentargets/clinical_trial_reason_to_stop", split='all')

# Tokenize test dataset

dataset_cols = [col for col in test_dataset.column_names if col not in ["text", "input_ids", "attention_mask", "labels"]]
tokenized_test_dataset = test_dataset.map(tokenize, batched=True, remove_columns=dataset_cols)
predictions = trainer_new.predict(tokenized_test_dataset).predictions
true_labels = np.array(tokenized_test_dataset["labels"]).astype(bool)
# Get predictions
sigmoid_threshold = 0.3
predictions = torch.from_numpy(predictions).sigmoid()
true_labels = torch.from_numpy(true_labels) 
# Convert probabilities to binary predictions using threshold
predicted_labels = (predictions > sigmoid_threshold)
report = classification_report(true_labels, predicted_labels, target_names=labels, digits=4)
print(report)

                         precision    recall  f1-score   support

          Another_Study     0.9747    0.9698    0.9723       199
Business_Administrative     0.9666    0.9861    0.9762       792
                Covid19     0.9946    1.0000    0.9973       183
           Endpoint_Met     0.6923    0.8824    0.7759        51
         Ethical_Reason     0.9333    0.8235    0.8750        17
      Insufficient_Data     0.9130    0.5385    0.6774        39
Insufficient_Enrollment     0.9808    0.9953    0.9880      1075
       Interim_Analysis     0.9286    0.9286    0.9286        28
         Invalid_Reason     0.9636    0.9520    0.9577       250
    Logistics_Resources     0.8743    0.9701    0.9197       301
               Negative     0.9758    0.9864    0.9811       368
             No_Context     0.9359    0.8795    0.9068        83
             Regulatory     0.9640    0.9554    0.9596       112
     Safety_Sideeffects     0.9858    0.9905    0.9882       211
           Study_Design 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
