In [None]:
!pip install transformers datasets torch scikit-learn==1.2.2 scipy==1.10.1  seqeval==1.2.2 pytorch-crf==0.7.2

Collecting datasets
  Downloading datasets-2.16.0-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting scipy==1.10.1
  Downloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting seqeval==1.2.2
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pytorch-crf==0.7.2
  Downloading pytorch_crf-0.7.2-py3-none-any.whl (9.5 kB)
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/BERT-BiLSTM-CRF

[Errno 2] No such file or directory: '/content/drive/MyDrive/BERT-BiLSTM-CRF'
/content


In [None]:
import os
import json
import torch
import numpy as np
import torch.nn as nn
from tqdm import tqdm
from torchcrf import CRF
from datasets import load_dataset
from torch.utils.data import DataLoader
from seqeval.metrics import classification_report
from transformers import BertModel, BertConfig, BertTokenizer, AdamW, get_linear_schedule_with_warmup, AutoTokenizer, DataCollatorForTokenClassification

In [None]:
tag_2_id = {'B-application': 0, 'B-cve id': 1, 'B-edition': 2, 'B-file': 3, 'B-function': 4, 'B-hardware': 5, 'B-language': 6, 'B-method': 7, 'B-os': 8, 'B-parameter': 9, 'B-programming language': 10, 'B-relevant_term': 11, 'B-update': 12, 'B-vendor': 13, 'B-version': 14, 'I-application': 15, 'I-edition': 16, 'I-hardware': 17, 'I-os': 18, 'I-relevant_term': 19, 'I-update': 20, 'I-vendor': 21, 'I-version': 22, 'O': 23}
id_2_tag = {0: 'B-application', 1: 'B-cve id', 2: 'B-edition', 3: 'B-file', 4: 'B-function', 5: 'B-hardware', 6: 'B-language', 7: 'B-method', 8: 'B-os', 9: 'B-parameter', 10: 'B-programming language', 11: 'B-relevant_term', 12: 'B-update', 13: 'B-vendor', 14: 'B-version', 15: 'I-application', 16: 'I-edition', 17: 'I-hardware', 18: 'I-os', 19: 'I-relevant_term', 20: 'I-update', 21: 'I-vendor', 22: 'I-version', 23: 'O'}

In [None]:
label_names = [v for k,v in id_2_tag.items()]

# **Fine-Tune BERT**

In [None]:
class NerConfig:
    def __init__(self):
        self.bert_dir = "thongnef/bert-finetuned-ner-cti"

        self.output_dir = "./checkpoint/"
        if not os.path.exists(self.output_dir):
            os.mkdir(self.output_dir)

        self.bio_labels = label_names
        self.num_labels = len(self.bio_labels)
        self.label2id = tag_2_id
        self.id2label = id_2_tag

        self.max_seq_len = 512
        self.epochs = 5
        self.train_batch_size = 8
        self.dev_batch_size = 8
        self.bert_learning_rate = 2e-5
        self.crf_learning_rate = 3e-3
        self.adam_epsilon = 1e-8
        self.weight_decay = 0.01
        self.warmup_proportion = 0.01
        self.save_step = 500

# **Model**

In [None]:
class ModelOutput:
  def __init__(self, logits, labels, loss=None):
    self.logits = logits
    self.labels = labels
    self.loss = loss

class BertNer(nn.Module):
  def __init__(self, args):
    super(BertNer, self).__init__()
    self.bert = BertModel.from_pretrained(args.bert_dir)
    self.bert_config = BertConfig.from_pretrained(args.bert_dir)
    hidden_size = self.bert_config.hidden_size
    # print(hidden_size)
    self.lstm_hiden = 128
    self.max_seq_len = args.max_seq_len
    self.bilstm = nn.LSTM(hidden_size, self.lstm_hiden, 1, bidirectional=True, batch_first=True,
               dropout=0.1)
    self.linear = nn.Linear(self.lstm_hiden * 2, args.num_labels)
    self.crf = CRF(args.num_labels, batch_first=True)

  def forward(self, input_ids, attention_mask, labels=None):
    bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
    #print(bert_output[0].size())
    seq_out = bert_output[0]  # [batchsize, max_len, 768]
    batch_size = seq_out.size(0)
    # print(batch_size)
    seq_out, _ = self.bilstm(seq_out)
    # print(seq_out.size())
    seq_out = seq_out.contiguous().view(-1, self.lstm_hiden * 2)
    # print(seq_out.size())
    # print()
    seq_out = seq_out.contiguous().view(batch_size, self.max_seq_len, -1)
    # print(seq_out.size())
    seq_out = self.linear(seq_out)

    logits = self.crf.decode(seq_out, mask=attention_mask.bool())
    loss = None
    if labels is not None:
      loss = -self.crf(seq_out, labels, mask=attention_mask.bool(), reduction='mean')
    model_output = ModelOutput(logits, labels, loss)
    return model_output

In [None]:
class Trainer:
    def __init__(self,
                 output_dir=None,
                 model=None,
                 train_loader=None,
                 save_step=500,
                 dev_loader=None,
                 test_loader=None,
                 optimizer=None,
                 schedule=None,
                 epochs=1,
                 device="cpu",
                 id2label=None):
        self.output_dir = output_dir
        self.model = model
        self.train_loader = train_loader
        self.dev_loader = dev_loader
        self.test_loader = test_loader
        self.epochs = epochs
        self.device = device
        self.optimizer = optimizer
        self.schedule = schedule
        self.id2label = id2label
        self.save_step = save_step
        self.total_step = len(self.train_loader) * self.epochs

    def train(self):
        global_step = 1
        for epoch in range(1, self.epochs + 1):
            for step, batch_data in enumerate(self.train_loader):
                self.model.train()
                for key, value in batch_data.items():
                    batch_data[key] = value.to(self.device)
                input_ids = batch_data["input_ids"]
                attention_mask = batch_data["attention_mask"]
                labels = batch_data["labels"]
                output = self.model(input_ids, attention_mask, labels)
                loss = output.loss
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                self.schedule.step()
                print(f"【train】{epoch}/{self.epochs} {global_step}/{self.total_step} loss:{loss.item()}")
                global_step += 1
                if global_step % self.save_step == 0:
                    torch.save(self.model.state_dict(), os.path.join(self.output_dir, "pytorch_model_ner.bin"))


        torch.save(self.model.state_dict(), os.path.join(self.output_dir, "pytorch_model_ner.bin"))

    def test(self):
        self.model.load_state_dict(torch.load(os.path.join(self.output_dir, "pytorch_model_ner.bin")))
        self.model.eval()
        preds = []
        trues = []
        for step, batch_data in enumerate(tqdm(self.test_loader)):
            for key, value in batch_data.items():
                batch_data[key] = value.to(self.device)
            input_ids = batch_data["input_ids"]
            attention_mask = batch_data["attention_mask"]
            labels = batch_data["labels"]
            output = self.model(input_ids, attention_mask, labels)
            logits = output.logits
            attention_mask = attention_mask.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()

            batch_size = input_ids.size(0)
            for i in range(batch_size):
                length = sum(attention_mask[i])
                logit = logits[i][1:length]
                logit = [self.id2label[i] for i in logit]
                label = labels[i][1:length]
                label = [self.id2label[i] for i in label]
                preds.append(logit)
                trues.append(label)

        report = classification_report(trues, preds, digits=7)
        return report

In [None]:
def build_optimizer_and_scheduler(args, model, t_total):
    module = (
        model.module if hasattr(model, "module") else model
    )

    no_decay = ["bias", "LayerNorm.weight"]
    model_param = list(module.named_parameters())

    bert_param_optimizer = []
    other_param_optimizer = []

    for name, para in model_param:
        space = name.split('.')
        # print(name)
        if space[0] == 'bert_module' or space[0] == "bert":
            bert_param_optimizer.append((name, para))
        else:
            other_param_optimizer.append((name, para))

    optimizer_grouped_parameters = [
        # bert other module
        {"params": [p for n, p in bert_param_optimizer if not any(nd in n for nd in no_decay)],
         "weight_decay": args.weight_decay, 'lr': args.bert_learning_rate},
        {"params": [p for n, p in bert_param_optimizer if any(nd in n for nd in no_decay)],
         "weight_decay": 0.0, 'lr': args.bert_learning_rate},

        {"params": [p for n, p in other_param_optimizer if not any(nd in n for nd in no_decay)],
         "weight_decay": args.weight_decay, 'lr': args.crf_learning_rate},
        {"params": [p for n, p in other_param_optimizer if any(nd in n for nd in no_decay)],
         "weight_decay": 0.0, 'lr': args.crf_learning_rate},
    ]

    optimizer = AdamW(optimizer_grouped_parameters, lr=args.bert_learning_rate, eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=int(args.warmup_proportion * t_total), num_training_steps=t_total
    )

    return optimizer, scheduler

In [None]:
#process data
def preprocess_data():
  args = NerConfig()
  raw_datasets = load_dataset("thongnef/dataset_dacn")
  # tokenizer = BertTokenizer.from_pretrained(args.bert_dir)
  tokenizer = AutoTokenizer.from_pretrained(args.bert_dir)

  converted_dict = {0:15, 1:1,2:16, 5:17,8:18, 10:19,12:20,13:21, 14:22 }

  def align_labels_with_tokens(labels, word_ids):
      new_labels = []
      current_word = None
      for word_id in word_ids:
          if word_id != current_word:
              # Start of a new word!
              current_word = word_id
              label = -100 if word_id is None else labels[word_id]
              new_labels.append(label)
          elif word_id is None:
              # Special token
              new_labels.append(-100)
          else:
              # Same word as previous token
              label = labels[word_id]
              if label in converted_dict.keys():
                label = converted_dict[label]
              # if label % 2 == 1:
              #     label += 1
              new_labels.append(label)
      new_labels = [0 if x == -100 else x for x in new_labels]
      return new_labels

  def tokenize_and_align_labels(examples):
      tokenized_inputs = tokenizer(
          examples["words"], truncation=True, is_split_into_words=True, padding="max_length"
      )
      all_labels = examples["tag"]
      new_labels = []
      for i, labels in enumerate(all_labels):
          word_ids = tokenized_inputs.word_ids(i)
          new_labels.append(align_labels_with_tokens(labels, word_ids))

      tokenized_inputs["labels"] = new_labels
      return tokenized_inputs

  tokenized_datasets = raw_datasets.map(
      tokenize_and_align_labels,
      batched=True,
      remove_columns=raw_datasets["train"].column_names,
  )

  tokenized_datasets = tokenized_datasets.remove_columns("token_type_ids")
  data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

  return tokenized_datasets, data_collator


In [None]:
def main():
    args = NerConfig()

    with open(os.path.join(args.output_dir, "ner_args.json"), "w") as fp:
        json.dump(vars(args), fp, ensure_ascii=False, indent=2)

    # tokenizer = BertTokenizer.from_pretrained(args.bert_dir)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    tokenized_datasets, data_collator = preprocess_data()

    train_dataset = tokenized_datasets["train"]
    dev_dataset = tokenized_datasets["test"]
    train_loader = DataLoader(train_dataset, shuffle=True, batch_size=args.train_batch_size, num_workers=2, collate_fn=data_collator)
    dev_loader = DataLoader(dev_dataset, shuffle=False, batch_size=args.dev_batch_size, num_workers=2, collate_fn=data_collator)

    model = BertNer(args)

    # for name,_ in model.named_parameters():
    #   print(name)

    model.to(device)
    t_toal = len(train_loader) * args.epochs
    optimizer, schedule = build_optimizer_and_scheduler(args, model, t_toal)

    train = Trainer(
        output_dir=args.output_dir,
        model=model,
        train_loader=train_loader,
        dev_loader=dev_loader,
        test_loader=dev_loader,
        optimizer=optimizer,
        schedule=schedule,
        epochs=args.epochs,
        device=device,
        id2label=args.id2label
    )

    train.train()

    report = train.test()
    print(report)


In [None]:
main()

Downloading readme:   0%|          | 0.00/519 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.03M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/504k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/13794 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3449 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/669k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Map:   0%|          | 0/13794 [00:00<?, ? examples/s]

Map:   0%|          | 0/3449 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/431M [00:00<?, ?B/s]

Some weights of BertModel were not initialized from the model checkpoint at thongnef/bert-finetuned-ner-cti and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


【train】1/5 1/8625 loss:195.16787719726562
【train】1/5 2/8625 loss:233.55917358398438
【train】1/5 3/8625 loss:156.13648986816406
【train】1/5 4/8625 loss:173.29269409179688
【train】1/5 5/8625 loss:152.84422302246094
【train】1/5 6/8625 loss:170.88853454589844
【train】1/5 7/8625 loss:190.3955078125
【train】1/5 8/8625 loss:150.55860900878906
【train】1/5 9/8625 loss:193.64218139648438
【train】1/5 10/8625 loss:171.3553924560547
【train】1/5 11/8625 loss:79.86306762695312
【train】1/5 12/8625 loss:117.07167053222656
【train】1/5 13/8625 loss:98.7008056640625
【train】1/5 14/8625 loss:101.8968734741211
【train】1/5 15/8625 loss:60.932376861572266
【train】1/5 16/8625 loss:82.23583984375
【train】1/5 17/8625 loss:66.083251953125
【train】1/5 18/8625 loss:68.63106536865234
【train】1/5 19/8625 loss:53.75434112548828
【train】1/5 20/8625 loss:55.719879150390625
【train】1/5 21/8625 loss:73.84825897216797
【train】1/5 22/8625 loss:45.197513580322266
【train】1/5 23/8625 loss:47.589637756347656
【train】1/5 24/8625 loss:48.695129394531

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


【train】2/5 1726/8625 loss:0.06680536270141602
【train】2/5 1727/8625 loss:6.115837097167969
【train】2/5 1728/8625 loss:8.693008422851562
【train】2/5 1729/8625 loss:0.07357406616210938
【train】2/5 1730/8625 loss:0.4268608093261719
【train】2/5 1731/8625 loss:0.16737747192382812
【train】2/5 1732/8625 loss:0.08260536193847656
【train】2/5 1733/8625 loss:0.2237396240234375
【train】2/5 1734/8625 loss:0.3422670364379883
【train】2/5 1735/8625 loss:0.07525253295898438
【train】2/5 1736/8625 loss:0.03706550598144531
【train】2/5 1737/8625 loss:0.1264173984527588
【train】2/5 1738/8625 loss:0.5866813659667969
【train】2/5 1739/8625 loss:0.027912139892578125
【train】2/5 1740/8625 loss:3.4232025146484375
【train】2/5 1741/8625 loss:0.22823333740234375
【train】2/5 1742/8625 loss:0.02526712417602539
【train】2/5 1743/8625 loss:0.1752300262451172
【train】2/5 1744/8625 loss:0.06155586242675781
【train】2/5 1745/8625 loss:0.3554725646972656
【train】2/5 1746/8625 loss:0.4066505432128906
【train】2/5 1747/8625 loss:0.0637664794921875
【

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


【train】3/5 3451/8625 loss:0.20887279510498047
【train】3/5 3452/8625 loss:0.02277851104736328
【train】3/5 3453/8625 loss:0.07638978958129883
【train】3/5 3454/8625 loss:0.16247177124023438
【train】3/5 3455/8625 loss:0.031087875366210938
【train】3/5 3456/8625 loss:0.11355376243591309
【train】3/5 3457/8625 loss:0.00910186767578125
【train】3/5 3458/8625 loss:0.011487007141113281
【train】3/5 3459/8625 loss:0.11832809448242188
【train】3/5 3460/8625 loss:0.1840686798095703
【train】3/5 3461/8625 loss:1.4584789276123047
【train】3/5 3462/8625 loss:0.01920604705810547
【train】3/5 3463/8625 loss:0.030987977981567383
【train】3/5 3464/8625 loss:0.02150726318359375
【train】3/5 3465/8625 loss:0.01760101318359375
【train】3/5 3466/8625 loss:0.5195388793945312
【train】3/5 3467/8625 loss:0.0818328857421875
【train】3/5 3468/8625 loss:0.122894287109375
【train】3/5 3469/8625 loss:0.11274528503417969
【train】3/5 3470/8625 loss:0.5423240661621094
【train】3/5 3471/8625 loss:0.061374664306640625
【train】3/5 3472/8625 loss:0.013539314

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


【train】4/5 5176/8625 loss:0.1462688446044922
【train】4/5 5177/8625 loss:0.021436691284179688
【train】4/5 5178/8625 loss:0.01480865478515625
【train】4/5 5179/8625 loss:0.022388696670532227
【train】4/5 5180/8625 loss:0.1940140724182129
【train】4/5 5181/8625 loss:0.0431976318359375
【train】4/5 5182/8625 loss:0.020124435424804688
【train】4/5 5183/8625 loss:0.015504837036132812
【train】4/5 5184/8625 loss:0.011484146118164062
【train】4/5 5185/8625 loss:0.03558826446533203
【train】4/5 5186/8625 loss:0.8443813323974609
【train】4/5 5187/8625 loss:0.06433868408203125
【train】4/5 5188/8625 loss:0.022822856903076172
【train】4/5 5189/8625 loss:0.026686429977416992
【train】4/5 5190/8625 loss:0.014736175537109375
【train】4/5 5191/8625 loss:0.766357421875
【train】4/5 5192/8625 loss:0.0105438232421875
【train】4/5 5193/8625 loss:0.5013980865478516
【train】4/5 5194/8625 loss:0.10072445869445801
【train】4/5 5195/8625 loss:0.014513015747070312
【train】4/5 5196/8625 loss:0.2237567901611328
【train】4/5 5197/8625 loss:0.009578704

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


【train】5/5 6901/8625 loss:0.007864236831665039
【train】5/5 6902/8625 loss:0.008399724960327148
【train】5/5 6903/8625 loss:0.00673675537109375
【train】5/5 6904/8625 loss:0.013822555541992188
【train】5/5 6905/8625 loss:0.0036497116088867188
【train】5/5 6906/8625 loss:0.012851715087890625
【train】5/5 6907/8625 loss:0.005507469177246094
【train】5/5 6908/8625 loss:0.5430335998535156
【train】5/5 6909/8625 loss:0.061313629150390625
【train】5/5 6910/8625 loss:0.13126754760742188
【train】5/5 6911/8625 loss:0.05367088317871094
【train】5/5 6912/8625 loss:0.008913993835449219
【train】5/5 6913/8625 loss:0.01275634765625
【train】5/5 6914/8625 loss:0.004318714141845703
【train】5/5 6915/8625 loss:0.11980199813842773
【train】5/5 6916/8625 loss:0.13880157470703125
【train】5/5 6917/8625 loss:0.010755538940429688
【train】5/5 6918/8625 loss:0.025848388671875
【train】5/5 6919/8625 loss:0.03189849853515625
【train】5/5 6920/8625 loss:0.05801582336425781
【train】5/5 6921/8625 loss:1.7608084678649902
【train】5/5 6922/8625 loss:0.00

  0%|          | 0/432 [00:00<?, ?it/s]You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|██████████| 432/432 [03:56<00:00,  1.82it/s]


                      precision    recall  f1-score   support

         application  0.9560424 0.9565764 0.9563093      7162
              cve id  1.0000000 1.0000000 1.0000000      3600
             edition  0.8032787 0.7903226 0.7967480       124
                file  0.9616813 0.9986120 0.9797987      6484
            function  0.9683009 0.9991278 0.9834728      2293
            hardware  0.6720000 0.7777778 0.7210300       108
            language  0.0000000 0.0000000 0.0000000         1
              method  0.9040000 1.0000000 0.9495798       226
                  os  0.9429825 0.9612519 0.9520295       671
           parameter  0.9936407 0.9455371 0.9689922       661
programming language  0.9655172 1.0000000 0.9824561        28
       relevant_term  0.9957781 0.9959884 0.9958833     18945
              update  0.9375000 0.9328358 0.9351621       804
              vendor  0.9601770 0.9634928 0.9618321      2027
             version  0.9844907 0.9846642 0.9845774      5673

      

In [None]:
print(round(report, 10))