In [None]:
!pip install torch torchvision --quiet
!pip install transformers  --quiet
!pip install pandas  --quiet
!pip install numpy  --quiet
!pip install sentencepiece  --quiet
!pip install sentence-splitter  --quiet
!pip install shap --quiet
!pip install optuna --quiet




[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m84.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m74.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.9/547.9 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.2/404.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Imports**

In [None]:
import random
import torch
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW,AutoModelForQuestionAnswering, get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
from transformers import DebertaTokenizer, DebertaModel, BartTokenizer
import math

# **Model loading**

In [None]:
# Use a GPU if you have one available (Runtime -> Change runtime type -> GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set seeds for reproducibility
random.seed(26)
np.random.seed(26)
torch.manual_seed(26)

tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext", do_lower_case=True)


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [None]:
def get_optimizer_grouped_parameters(
    model, model_type,
    learning_rate, weight_decay,
    layerwise_learning_rate_decay
):
    no_decay = ["bias", "LayerNorm.weight"]
    # initialize lr for task specific layer
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if "classifier" in n or "pooler" in n],
            "weight_decay": 0.0,
            "lr": learning_rate,
        },
    ]
    # initialize lrs for every layer
    num_layers = model.config.num_hidden_layers
    layers = [getattr(model, model_type).embeddings] + list(getattr(model, model_type).encoder.layer)
    layers.reverse()
    lr = learning_rate
    for layer in layers:
        lr *= layerwise_learning_rate_decay
        optimizer_grouped_parameters += [
            {
                "params": [p for n, p in layer.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": weight_decay,
                "lr": lr,
            },
            {
                "params": [p for n, p in layer.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
                "lr": lr,
            },
        ]
    return optimizer_grouped_parameters

In [None]:
def encode_data(tokenizer, passages,questions, max_length):
    """Encode the question/passage pairs into features than can be fed to the model."""
    input_ids = []
    attention_masks = []

    for passage,question in zip(passages,questions):
        encoded_data = tokenizer.encode_plus(passage,question, max_length=max_length, pad_to_max_length=True, truncation='longest_first')
        encoded_pair = encoded_data["input_ids"]
        attention_mask = encoded_data["attention_mask"]

        input_ids.append(encoded_pair)
        attention_masks.append(attention_mask)

    return np.array(input_ids), np.array(attention_masks)

In [None]:
def predict(passage,question):
  sequence = tokenizer.encode_plus(passage,question, max_length=512, pad_to_max_length=True, truncation='longest_first',return_tensors="pt")['input_ids'].to(device)

  logits = model(sequence)[0]
  probabilities = torch.softmax(logits, dim=1).detach().cpu().tolist()[0]
  proba_yes = round(probabilities[1], 2)
  proba_no = round(probabilities[0], 2)

  #print(f"Question: {question}, Yes: {proba_yes}, No: {proba_no}")

  if (proba_yes >= proba_no):
    return True
  else:
    return False






In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os


# Train and evaluate the accuracy of neural network with the addition of pruning mechanism
def train_and_evaluate(model,train_data_df,dev_data_df):
    os.mkdir('./models')
    passages_train = train_data_df.Abstract.values
    questions_train = train_data_df.questions.values
    answers_train = train_data_df.AMES.values.astype(int)

    passages_dev = dev_data_df.Abstract.values
    questions_dev = dev_data_df.questions.values
    answers_dev = dev_data_df.AMES.values.astype(int)

    # Encoding data
    max_seq_length = 512
    input_ids_train, attention_masks_train = encode_data(tokenizer, passages_train,questions_train, max_seq_length)
    input_ids_dev, attention_masks_dev = encode_data(tokenizer, passages_dev,questions_dev, max_seq_length)

    train_features = (input_ids_train, attention_masks_train, answers_train)
    dev_features = (input_ids_dev, attention_masks_dev, answers_dev)

    batch_size = 2
    train_features_tensors = [torch.tensor(feature, dtype=torch.long) for feature in train_features]
    dev_features_tensors = [torch.tensor(feature, dtype=torch.long) for feature in dev_features]

    train_dataset = TensorDataset(*train_features_tensors)
    dev_dataset = TensorDataset(*dev_features_tensors)

    train_sampler = RandomSampler(train_dataset)
    dev_sampler = SequentialSampler(dev_dataset)

    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)
    dev_dataloader = DataLoader(dev_dataset, sampler=dev_sampler, batch_size=batch_size)

    learning_rate = 5e-5
    layerwise_learning_rate_decay = 0.9
    weight_decay = 0.01
    adam_epsilon = 1e-6
    use_bertadam = False
    # scheduler params
    num_epochs = 9
    num_warmup_steps = 0
    _model_type = 'bert'

    grouped_optimizer_params = get_optimizer_grouped_parameters(
        model, _model_type,
        learning_rate, weight_decay,
        layerwise_learning_rate_decay
    )
    optimizer = AdamW(
        grouped_optimizer_params,
        lr=learning_rate,
        eps=adam_epsilon,
        correct_bias=not use_bertadam
    )
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_epochs
    )

    (learning_rates1, learning_rates2, learning_rates3, learning_rates4,
    learning_rates5, learning_rates6, learning_rates7, learning_rates8,
    learning_rates9, learning_rates10, learning_rates11, learning_rates12,
    learning_rates13, learning_rates14) = [[] for i in range(14)]

    learning_rates1.append(optimizer.param_groups[0]["lr"])
    learning_rates2.append(optimizer.param_groups[2]["lr"])
    learning_rates3.append(optimizer.param_groups[4]["lr"])
    learning_rates4.append(optimizer.param_groups[6]["lr"])
    learning_rates5.append(optimizer.param_groups[8]["lr"])
    learning_rates6.append(optimizer.param_groups[10]["lr"])
    learning_rates7.append(optimizer.param_groups[12]["lr"])
    learning_rates8.append(optimizer.param_groups[14]["lr"])
    learning_rates9.append(optimizer.param_groups[16]["lr"])
    learning_rates10.append(optimizer.param_groups[18]["lr"])
    learning_rates11.append(optimizer.param_groups[20]["lr"])
    learning_rates12.append(optimizer.param_groups[22]["lr"])
    learning_rates13.append(optimizer.param_groups[24]["lr"])
    learning_rates14.append(optimizer.param_groups[26]["lr"])

    for epoch in range(num_epochs):
        optimizer.step()
        scheduler.step()
        learning_rates1.append(optimizer.param_groups[0]["lr"])
        learning_rates2.append(optimizer.param_groups[2]["lr"])
        learning_rates3.append(optimizer.param_groups[4]["lr"])
        learning_rates4.append(optimizer.param_groups[6]["lr"])
        learning_rates5.append(optimizer.param_groups[8]["lr"])
        learning_rates6.append(optimizer.param_groups[10]["lr"])
        learning_rates7.append(optimizer.param_groups[12]["lr"])
        learning_rates8.append(optimizer.param_groups[14]["lr"])
        learning_rates9.append(optimizer.param_groups[16]["lr"])
        learning_rates10.append(optimizer.param_groups[18]["lr"])
        learning_rates11.append(optimizer.param_groups[20]["lr"])
        learning_rates12.append(optimizer.param_groups[22]["lr"])
        learning_rates13.append(optimizer.param_groups[24]["lr"])
        learning_rates14.append(optimizer.param_groups[26]["lr"])

    print("Done setting up optimizer\n")
    train_loss_values = []
    dev_acc_values = []
    state=[]

    #num_epochs=5
    for i in tqdm(range(num_epochs), desc="Epoch"):

      # Training
      print("In epoch ", i, "\n")
      epoch_train_loss = 0 # Cumulative loss
      model.train()
      #model.zero_grad()

      for step, batch in enumerate(train_dataloader):

          input_ids = batch[0].to(device)
          attention_masks = batch[1].to(device)
          labels = batch[2].to(device)
          model.zero_grad()
          outputs = model(input_ids, token_type_ids=None, attention_mask=attention_masks, labels=labels)

          loss = outputs[0]
          #loss = loss / grad_acc_steps
          epoch_train_loss += loss.item()

          loss.backward()
          torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
          optimizer.step()
          scheduler.step()

      epoch_train_loss = epoch_train_loss / len(train_dataloader)
      train_loss_values.append(epoch_train_loss)
      print("Epoch loss is", epoch_train_loss)
      model.save_pretrained("./models/" + str(i))

      # Evaluation
      epoch_dev_accuracy = 0 # Cumulative accuracy
      model.eval()

      for batch in dev_dataloader:

        input_ids = batch[0].to(device)
        attention_masks = batch[1].to(device)
        labels = batch[2]

        with torch.no_grad():
            outputs = model(input_ids, token_type_ids=None, attention_mask=attention_masks)

        logits = outputs[0]
        logits = logits.detach().cpu().numpy()

        predictions = np.argmax(logits, axis=1).flatten()
        labels = labels.numpy().flatten()

        epoch_dev_accuracy += np.sum(predictions == labels) / len(labels)

      epoch_dev_accuracy = epoch_dev_accuracy / len(dev_dataloader)
      print("Epoch accuracy is",epoch_dev_accuracy )
      dev_acc_values.append(epoch_dev_accuracy)
      temp=model
      state.append(temp)

    index = dev_acc_values.index(max(dev_acc_values))
    best = state[index]
    return best,dev_acc_values



In [None]:
import pandas as pd

df=pd.read_csv('./new_data_mutagenicity.csv')

# Apply the function to the column
df['label'] = df['label'].apply(lambda x: x.startswith("['True"))
#df['label'] = df['label'].apply(lambda x: x[0] == "['False - AMES non Mutagenic']")

new_column_names = {'text': 'Abstract', 'label': 'AMES'}
df.rename(columns=new_column_names, inplace=True)


In [None]:
df['AMES'].value_counts()

True     916
False    730
Name: AMES, dtype: int64

In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
import pandas as pd
import gc
import shutil
#df=pd.read_csv('./temp-bool.csv')

kf = KFold(n_splits=5, random_state=42, shuffle=True)
CV_accuracy_array=[]
CV_macro_avg_array=[]
CV_weighted_avg_array=[]
for train_index, test_index in kf.split(df):
    print("TRAIN:", train_index)
    print("TEST:", test_index)
    train, test_data_df = df.loc[train_index], df.loc[test_index]
    train_size = int(0.6 * len(train))
    train_data_df = train.iloc[:train_size]
    dev_data_df = train.iloc[train_size:]

    model = AutoModelForSequenceClassification.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
    model.to(device)
    model,dev_acc_values=train_and_evaluate(model,train, test_data_df)
    del model
    model = AutoModelForSequenceClassification.from_pretrained("./models/" + str(dev_acc_values.index(max(dev_acc_values))))
    model.to(device)
    filter=list(test_data_df.index)
    preds=[]
    index=[]
    for i in filter:
        try:
          passage=test_data_df.Abstract[i]
          question =test_data_df.questions[i]
          answer=predict(passage, question)
          preds.append(answer)
          index.append(i)
        except Exception as e:
          print(e)
          continue
    dev=test_data_df[test_data_df.index.isin(index)]
    true_results=dev['AMES'].tolist()
    print(classification_report(true_results, preds))
    results = classification_report(true_results, preds,output_dict=True,)
    CV_accuracy_array.append(results['accuracy'])
    CV_macro_avg_array.append(results['macro avg']['f1-score'])
    CV_weighted_avg_array.append(results['weighted avg']['f1-score'])
    del model
    gc.collect()
    torch.cuda.empty_cache()
    shutil.rmtree('./models')





TRAIN: [   0    1    2 ... 1643 1644 1645]
TEST: [  15   23   29   30   32   43   44   49   51   56   59   63   65   67
   69   70   73   76   78   99  100  101  107  109  115  123  124  128
  135  141  148  162  163  168  170  173  175  184  185  192  198  199
  203  212  220  226  231  237  239  240  244  247  251  259  261  266
  270  271  274  275  289  297  298  300  303  306  309  316  324  331
  332  339  342  344  350  351  352  353  366  367  371  374  383  394
  398  405  408  411  413  414  415  416  420  422  425  426  433  438
  450  451  464  471  479  481  482  483  486  490  493  494  497  526
  527  529  534  538  543  551  552  554  560  561  567  575  582  584
  585  588  589  590  591  597  610  613  614  617  619  620  621  629
  651  654  668  669  674  679  680  682  694  706  707  720  724  727
  730  741  744  752  754  764  765  767  772  780  792  798  802  803
  809  810  813  816  818  838  842  844  845  847  855  861  865  867
  873  874  879  887  888  8

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/9 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7151351644990875
Epoch accuracy is 0.8242424242424242
In epoch  1 

Epoch loss is 0.49465700207760427
Epoch accuracy is 0.8393939393939394
In epoch  2 

Epoch loss is 0.3470268264499749
Epoch accuracy is 0.8636363636363636
In epoch  3 

Epoch loss is 0.293176093162639
Epoch accuracy is 0.8757575757575757
In epoch  4 

Epoch loss is 0.20850011238468352
Epoch accuracy is 0.8818181818181818
In epoch  5 

Epoch loss is 0.1593443792696862
Epoch accuracy is 0.8787878787878788
In epoch  6 

Epoch loss is 0.13457753961398689
Epoch accuracy is 0.8727272727272727
In epoch  7 

Epoch loss is 0.13442888084180063
Epoch accuracy is 0.8909090909090909
In epoch  8 

Epoch loss is 0.08493918975987168
Epoch accuracy is 0.8848484848484849


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


              precision    recall  f1-score   support

       False       0.87      0.45      0.59       150
        True       0.67      0.94      0.79       180

    accuracy                           0.72       330
   macro avg       0.77      0.70      0.69       330
weighted avg       0.76      0.72      0.70       330

TRAIN: [   0    1    2 ... 1643 1644 1645]
TEST: [  10   18   31   41   48   54   58   81   83   86   88   96  111  113
  126  129  131  140  142  147  155  156  158  164  174  178  179  181
  188  195  196  208  209  210  214  218  221  233  236  243  254  265
  277  282  285  286  287  290  291  292  294  296  308  310  312  322
  323  326  327  333  341  346  348  354  355  358  360  361  363  365
  370  375  377  380  381  382  390  410  419  423  427  428  429  430
  432  435  439  447  453  458  461  462  468  477  478  485  495  500
  506  513  514  516  518  522  528  530  532  535  544  548  555  566
  570  571  576  578  583  596  598  599  601  602  607 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/9 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7085499519373436
Epoch accuracy is 0.8454545454545455
In epoch  1 

Epoch loss is 0.5380514219347797
Epoch accuracy is 0.8606060606060606
In epoch  2 

Epoch loss is 0.32154584104956263
Epoch accuracy is 0.9030303030303031
In epoch  3 

Epoch loss is 0.2927434881564374
Epoch accuracy is 0.9121212121212121
In epoch  4 

Epoch loss is 0.18973606238767235
Epoch accuracy is 0.906060606060606
In epoch  5 

Epoch loss is 0.15418620679915235
Epoch accuracy is 0.9
In epoch  6 

Epoch loss is 0.17916655840738124
Epoch accuracy is 0.9090909090909091
In epoch  7 

Epoch loss is 0.15273751681854048
Epoch accuracy is 0.8878787878787879
In epoch  8 

Epoch loss is 0.14096529160732602
Epoch accuracy is 0.8939393939393939
              precision    recall  f1-score   support

       False       0.92      0.32      0.48       137
        True       0.67      0.98      0.79       192

    accuracy                           0.71       329
   macro avg       0.79      0.65   

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/9 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.8139010029572892
Epoch accuracy is 0.7666666666666667
In epoch  1 

Epoch loss is 0.5426957674464247
Epoch accuracy is 0.8939393939393939
In epoch  2 

Epoch loss is 0.3615680560425464
Epoch accuracy is 0.9151515151515152
In epoch  3 

Epoch loss is 0.2999074702999812
Epoch accuracy is 0.906060606060606
In epoch  4 

Epoch loss is 0.23974375356062186
Epoch accuracy is 0.9181818181818182
In epoch  5 

Epoch loss is 0.18912889844700098
Epoch accuracy is 0.8878787878787879
In epoch  6 

Epoch loss is 0.19846544595807472
Epoch accuracy is 0.8909090909090909
In epoch  7 

Epoch loss is 0.12014130377385344
Epoch accuracy is 0.906060606060606
In epoch  8 

Epoch loss is 0.12877920352578606
Epoch accuracy is 0.9121212121212121
              precision    recall  f1-score   support

       False       0.84      0.82      0.83       142
        True       0.87      0.88      0.88       187

    accuracy                           0.86       329
   macro avg       0.86

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/9 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7394261108698996
Epoch accuracy is 0.8606060606060606
In epoch  1 

Epoch loss is 0.4740523625423493
Epoch accuracy is 0.8818181818181818
In epoch  2 

Epoch loss is 0.3463029825210627
Epoch accuracy is 0.896969696969697
In epoch  3 

Epoch loss is 0.2325680167859638
Epoch accuracy is 0.8878787878787879
In epoch  4 

Epoch loss is 0.17826187887235165
Epoch accuracy is 0.8727272727272727
In epoch  5 

Epoch loss is 0.16689543325347628
Epoch accuracy is 0.9
In epoch  6 

Epoch loss is 0.13286287377549685
Epoch accuracy is 0.8757575757575757
In epoch  7 

Epoch loss is 0.15736389407273552
Epoch accuracy is 0.8848484848484849
In epoch  8 

Epoch loss is 0.12150974208680951
Epoch accuracy is 0.8939393939393939
              precision    recall  f1-score   support

       False       0.53      0.96      0.68       145
        True       0.91      0.33      0.48       184

    accuracy                           0.60       329
   macro avg       0.72      0.64    

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Done setting up optimizer





Epoch:   0%|          | 0/9 [00:00<?, ?it/s]

In epoch  0 

Epoch loss is 0.7647304591045846
Epoch accuracy is 0.8181818181818182
In epoch  1 

Epoch loss is 0.5302093862810303
Epoch accuracy is 0.7909090909090909
In epoch  2 

Epoch loss is 0.3682375937601875
Epoch accuracy is 0.8424242424242424
In epoch  3 

Epoch loss is 0.28371346341218157
Epoch accuracy is 0.8696969696969697
In epoch  4 

Epoch loss is 0.23331865142314637
Epoch accuracy is 0.8909090909090909
In epoch  5 

Epoch loss is 0.18082073366258694
Epoch accuracy is 0.8515151515151516
In epoch  6 

Epoch loss is 0.11235814593276235
Epoch accuracy is 0.8424242424242424
In epoch  7 

Epoch loss is 0.09558103558953002
Epoch accuracy is 0.8424242424242424
In epoch  8 

Epoch loss is 0.11401979712746887
Epoch accuracy is 0.8545454545454545
              precision    recall  f1-score   support

       False       0.74      0.69      0.71       156
        True       0.73      0.78      0.76       173

    accuracy                           0.74       329
   macro avg       0

In [None]:
print("The mean accuracy score is", np.mean(CV_accuracy_array))

The mean accuracy score is 0.7241834760983696


In [None]:
print("The standard deviation for accuracy is", np.std(CV_accuracy_array))

The standard deviation for accuracy is 0.0805513967684362


In [None]:
print("The mean macro avg score is", np.mean(CV_macro_avg_array))

The mean macro avg score is 0.6982839753173546


In [None]:
print("The standard deviation for macro avg score is", np.std(CV_macro_avg_array))

The standard deviation for macro avg score is 0.09318259309419583


In [None]:
print("The mean weighted avg score is", np.mean(CV_weighted_avg_array))

The mean weighted avg score is 0.703822645619609


In [None]:
print("The standard deviation for weighted avg score is", np.std(CV_weighted_avg_array))

The standard deviation for weighted avg score is 0.09429102430060773


In [None]:
from google.colab import runtime
runtime.unassign()
