In [1]:
import os
import sys

sys.path.append("../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "bert-4-128-yahoo"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.5
seed = 44
include_layers = ["attention"]
exclude_layers = None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-11-03 16:35:25


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.
{'architectures': 'bert',
 'dataset_name': 'YahooAnswersTopics',
 'model_name': 'models/bert-4-128-yahoo',
 'num_labels': 10,
 'tokenizer_name': 'fabriceyhc/bert-base-uncased-yahoo_answers_topics'}


The model models/bert-4-128-yahoo is loaded.


In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset YahooAnswersTopics.
train.pkl is loaded from cache.
valid.pkl is loaded from cache.
test.pkl is loaded from cache.
The dataset YahooAnswersTopics is loaded
{'config_name': 'yahoo_answers_topics',
 'features': {'first_column': 'question_title', 'second_column': 'topic'},
 'path': 'yahoo_answers_topics'}


In [7]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns

def see(data):
  data = data.cpu().numpy().flatten()
  
  # # Heatmap
  # plt.figure(figsize=(10, 1))
  # sns.heatmap([data], cmap="viridis", cbar=True, annot=False, xticklabels=False, yticklabels=False)
  # plt.title("Heatmap of Tensor Values")
  # plt.show() 
  
  
  # Bar plot
  plt.figure(figsize=(15, 5))
  plt.bar(range(len(data)), data)
  plt.title("Bar Plot of Tensor Values")
  plt.xlabel("Index")
  plt.ylabel("Value")
  plt.show()



In [8]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [9]:
import torch
import torch.nn as nn
from scipy.stats import norm
from typing import *
from torch import Tensor
from torch.nn import Module
import torch.nn.functional as F
from functools import partial
from src.utils.sampling import SamplingDataset
from src.pruning.propagate import propagate
from src.utils.helper import Config
import gc


class Pruner:
    def __init__(self, layers, ratio: float, method="unstructed") -> None:
        self.ratio = ratio
        self.method = method
        self.layers = layers
        self.pruning_mask = {}
        self.pruning_indices = {}
        
    def ci(self, layer, inputs, outputs):
        current_weight = layer.weight.data
        X = inputs[0]
        batch_size = X.shape[0] // 2

        concern_inputs, non_concern_inputs = (
            X[:batch_size],
            X[batch_size:],
        )  # (batch_size, seq_dim, input_dim)

        calc_norm = lambda tensors, dim: torch.norm(
            tensors.reshape((-1, tensors.shape[-1])), dim=dim
        )

        new_shape = (1, -1)
        concern_norm = calc_norm(concern_inputs, dim=0).reshape(new_shape)
        non_concern_norm = calc_norm(non_concern_inputs, dim=0).reshape(new_shape)
        print(layer)
        # see(concern_norm)
        # see(non_concern_norm)
        cosine_similarity = F.cosine_similarity(
            concern_inputs.reshape((-1, concern_inputs.shape[-1])),
            non_concern_inputs.reshape((-1, non_concern_inputs.shape[-1])),
            dim=0,
        ).reshape(1, -1)

        sine_similarity = torch.sqrt(1 - cosine_similarity**2)
        alpha = (cosine_similarity * sine_similarity) / (cosine_similarity - sine_similarity) 
        coefficient = (
            concern_norm +  alpha * non_concern_norm
        )
        # see(coefficient)
        importance_score = torch.abs(current_weight) * torch.abs(coefficient)

        indices_vector = None
        if self.method == "unstructed":
            sort_res = torch.sort(importance_score, dim=-1, stable=True)
            num_prune = int(current_weight.shape[1] * self.ratio)
            indices_matrix = sort_res[1][:, :num_prune]
            W_mask = (torch.ones_like(importance_score) == 1).scatter_(
                1, indices_matrix, False
            )
        elif self.method == "structed":
            importance_vector = torch.norm(importance_score, dim=1)
            num_prune = int(importance_vector.shape[0] * self.ratio)
            sort_res = torch.sort(importance_vector)
            indices_vector = sort_res[1][:num_prune]
            W_mask = (torch.ones_like(importance_vector) == 1).scatter_(
                0, indices_vector, False
            )
        else:
            raise NotImplementedError(f"{self.method} is not implemented")

        if self.method == "unstructed":
            sorted_indices_matrix = torch.sort(indices_matrix, dim=1)[0]
            indices = sorted_indices_matrix

        elif self.method == "structed":
            sorted_indices_vector = torch.sort(indices_vector)[0]
            indices = sorted_indices_vector
        else:
            raise NotImplementedError(f"The method {self.method} is not implemented")

        layer_id = id(layer)
        layer_name = [key for key, val in self.layers.items() if id(val) == layer_id][0]
        self.pruning_mask[layer_name] = W_mask
        self.pruning_indices[layer_name] = indices

    @staticmethod
    def apply(layer, method, axis, mask, keepdim):
        current_weight = layer.weight.data.clone()
        current_weight = current_weight * mask
        if not keepdim:
            if method == "structed":
                if axis == 0:
                    zero_rows = (current_weight == 0).all(dim=1)
                    current_weight = current_weight[~zero_rows]

                    if layer.bias is not None:
                        current_bias = layer.bias.data.clone()
                        layer.bias.data = current_bias[~zero_rows]
                elif axis == 1:
                    zero_cols = (current_weight == 0).all(dim=0)
                    current_weight = current_weight[:, ~zero_cols]
        layer.in_features = current_weight.shape[1]
        layer.out_features = current_weight.shape[0]
        layer.weight.data = current_weight


def find_layers(
    model: Module,
    layer_types: Optional[List[Type[Module]]] = None,
    include_layers: Optional[List[str]] = None,
    exclude_layers: Optional[List[str]] = None,
    prefix: str = "",
) -> Dict[str, Module]:
    if layer_types is None:
        layer_types = [nn.Linear]
    if include_layers is None:
        include_layers = []
    if exclude_layers is None:
        exclude_layers = []
    layers_dict: Dict[str, Module] = {}

    def recursive_find(module: Module, prefix: str) -> None:
        for name, layer in module.named_children():
            layer_name = f"{prefix}.{name}" if prefix else name
            if any(exclude in layer_name for exclude in exclude_layers):
                continue
            if include_layers and not any(
                include in layer_name for include in include_layers
            ):
                if not any(isinstance(layer, t) for t in layer_types):
                    recursive_find(layer, layer_name)
                continue
            if isinstance(layer, tuple(layer_types)):
                layers_dict[layer_name] = layer
            else:
                recursive_find(layer, layer_name)

    recursive_find(model, prefix)

    return layers_dict


def get_hook(method):
    def hook(module, input, output):
        method(module, input, output)

    return hook

def prune_concern_identification(
    model: Module,
    config: Config,
    dominant_concern: SamplingDataset,
    non_dominant_concern: SamplingDataset,
    sparsity_ratio: float = 0.6,
    include_layers: Optional[List[str]] = None,
    exclude_layers: Optional[List[str]] = None,
    method: str = "unstructed",
    keep_dim=True,
) -> None:
    layers = find_layers(
        model, include_layers=include_layers, exclude_layers=exclude_layers
    )
    handle_list = []
    pruner = Pruner(layers, ratio=sparsity_ratio, method=method)

    for name, layer in layers.items():
        if method == "structed":
            if "intermediate" in name:
                handle = layer.register_forward_hook(pruner.ci)
                handle_list.append(handle)
        else:
            handle = layer.register_forward_hook(pruner.ci)
            handle_list.append(handle)

    dominant_batches = list(dominant_concern)
    non_dominant_batches = list(non_dominant_concern)

    if len(dominant_batches) != len(non_dominant_batches):
        raise ValueError(
            "Batch sizes of dominant_concern and non_dominant_concern does not match."
        )

    combined_batches = {}
    keys = dominant_batches[0].keys()

    for key in keys:
        combined_batches[key] = torch.cat(
            [batch[key] for batch in dominant_batches + non_dominant_batches]
        )

    combined_dataloader = [combined_batches]
    propagate(model, combined_dataloader, config)
    for handle in handle_list:
        handle.remove()

    intermediate_mask = None
    for name, layer in layers.items():
        if method == "structed":
            if "intermediate" in name:
                current_mask = pruner.pruning_mask[name].to("cpu")
                intermediate_mask = current_mask
                current_mask = current_mask.unsqueeze(dim=1).expand(
                    -1, layer.weight.shape[1]
                )
                Pruner.apply(
                    layer,
                    method="structed",
                    axis=0,
                    mask=current_mask,
                    keepdim=keep_dim,
                )
            elif "output" in name:
                current_mask = intermediate_mask.unsqueeze(dim=0).expand(
                    layer.weight.shape[0], -1
                )
                Pruner.apply(
                    layer,
                    method="structed",
                    axis=1,
                    mask=current_mask,
                    keepdim=keep_dim,
                )
        elif method == "unstructed":
            current_mask = pruner.pruning_mask[name].to("cpu")
            Pruner.apply(
                layer, method="unstructed", axis=0, mask=current_mask, keepdim=keep_dim
            )



In [10]:
result_list = []

for concern in range(config.num_labels):
    config.init_seed()
    positive_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        True,
        4,
        resample=False,
    )
    negative_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        False,
        4,
        resample=False,
    )
    all_samples = SamplingDataset(
        train_dataloader,
        config,
        200,
        num_samples,
        False,
        4,
        resample=False,
    )

    module = copy.deepcopy(model)

    prune_concern_identification(
        module,
        config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ratio,
        keep_dim=True,
        method="unstructed",
    )
    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, config, test_dataloader, verbose=False)
    result_list.append(result)
    get_sparsity(module)

    get_similarity(model, module, valid_dataloader, concern, num_samples, config)
    print("original model's perplexity")
    get_perplexity(model, valid_dataloader, config)
    print("pruned model's perplexity")
    get_perplexity(module, valid_dataloader, config)

Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Linear(in_features=128, out_features=128, bias=True)
Evaluate the pruned model 0


Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.3122
Precision: 0.6722, Recall: 0.5843, F1-Score: 0.5986
              precision    recall  f1-score   support

           0     0.4800    0.5264    0.5022      2992
           1     0.7625    0.3422    0.4724      2992
           2     0.7481    0.5286    0.6195      3012
           3     0.2764    0.7422    0.4028      2998
           4     0.7863    0.7117    0.7472      2973
           5     0.8833    0.7308    0.7999      3054
           6     0.7339    0.3710    0.4928      3003
           7     0.6577    0.6036    0.6295      3012
           8     0.5941    0.6989    0.6422      2982
           9     0.7996    0.5875    0.6774      2982

    accuracy                         0.5844     30000
   macro avg     0.6722    0.5843    0.5986     30000
weighted avg     0.6725    0.5844    0.5988     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.3190
Precision: 0.6682, Recall: 0.5907, F1-Score: 0.6059
              precision    recall  f1-score   support

           0     0.4533    0.5488    0.4965      2992
           1     0.7154    0.4412    0.5458      2992
           2     0.7672    0.5252    0.6236      3012
           3     0.2886    0.7021    0.4091      2998
           4     0.8311    0.6535    0.7317      2973
           5     0.8774    0.7338    0.7992      3054
           6     0.7188    0.3753    0.4931      3003
           7     0.6314    0.6318    0.6316      3012
           8     0.6004    0.7009    0.6468      2982
           9     0.7979    0.5946    0.6814      2982

    accuracy                         0.5909     30000
   macro avg     0.6682    0.5907    0.6059     30000
weighted avg     0.6684    0.5909    0.6061     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.2960
Precision: 0.6567, Recall: 0.6001, F1-Score: 0.6040
              precision    recall  f1-score   support

           0     0.5228    0.4823    0.5017      2992
           1     0.7798    0.3219    0.4556      2992
           2     0.6118    0.6806    0.6444      3012
           3     0.3163    0.6724    0.4303      2998
           4     0.7961    0.7275    0.7603      2973
           5     0.8512    0.7603    0.8032      3054
           6     0.7449    0.3636    0.4887      3003
           7     0.5912    0.6554    0.6216      3012
           8     0.5895    0.7002    0.6401      2982
           9     0.7634    0.6372    0.6946      2982

    accuracy                         0.6004     30000
   macro avg     0.6567    0.6001    0.6040     30000
weighted avg     0.6569    0.6004    0.6043     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.3071
Precision: 0.6642, Recall: 0.5861, F1-Score: 0.5974
              precision    recall  f1-score   support

           0     0.4573    0.5261    0.4893      2992
           1     0.7872    0.3115    0.4464      2992
           2     0.7375    0.5402    0.6236      3012
           3     0.2879    0.7105    0.4098      2998
           4     0.8025    0.6821    0.7375      2973
           5     0.8807    0.7351    0.8014      3054
           6     0.7003    0.3883    0.4996      3003
           7     0.6213    0.6418    0.6314      3012
           8     0.5964    0.6972    0.6429      2982
           9     0.7706    0.6284    0.6923      2982

    accuracy                         0.5863     30000
   macro avg     0.6642    0.5861    0.5974     30000
weighted avg     0.6645    0.5863    0.5976     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.3021
Precision: 0.6644, Recall: 0.5846, F1-Score: 0.5945
              precision    recall  f1-score   support

           0     0.5024    0.4880    0.4951      2992
           1     0.7571    0.3105    0.4404      2992
           2     0.6994    0.5730    0.6299      3012
           3     0.2788    0.7382    0.4048      2998
           4     0.7363    0.7666    0.7512      2973
           5     0.8616    0.7400    0.7962      3054
           6     0.7587    0.3423    0.4718      3003
           7     0.6252    0.6292    0.6272      3012
           8     0.6313    0.6529    0.6419      2982
           9     0.7933    0.6050    0.6865      2982

    accuracy                         0.5847     30000
   macro avg     0.6644    0.5846    0.5945     30000
weighted avg     0.6647    0.5847    0.5947     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.2796
Precision: 0.6494, Recall: 0.6064, F1-Score: 0.6115
              precision    recall  f1-score   support

           0     0.4458    0.5735    0.5017      2992
           1     0.7486    0.4011    0.5223      2992
           2     0.6985    0.5976    0.6441      3012
           3     0.3413    0.6368    0.4444      2998
           4     0.7678    0.7454    0.7564      2973
           5     0.7828    0.7917    0.7872      3054
           6     0.7082    0.3823    0.4965      3003
           7     0.6032    0.6414    0.6217      3012
           8     0.6321    0.6546    0.6432      2982
           9     0.7660    0.6398    0.6972      2982

    accuracy                         0.6066     30000
   macro avg     0.6494    0.6064    0.6115     30000
weighted avg     0.6496    0.6066    0.6117     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.3161
Precision: 0.6665, Recall: 0.5876, F1-Score: 0.6016
              precision    recall  f1-score   support

           0     0.4875    0.5067    0.4969      2992
           1     0.7512    0.3733    0.4988      2992
           2     0.7287    0.5564    0.6310      3012
           3     0.2808    0.7228    0.4045      2998
           4     0.8153    0.6636    0.7317      2973
           5     0.8733    0.7354    0.7984      3054
           6     0.7212    0.3696    0.4888      3003
           7     0.6312    0.6285    0.6298      3012
           8     0.6004    0.6968    0.6450      2982
           9     0.7750    0.6227    0.6906      2982

    accuracy                         0.5878     30000
   macro avg     0.6665    0.5876    0.6016     30000
weighted avg     0.6667    0.5878    0.6018     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.3222
Precision: 0.6654, Recall: 0.5858, F1-Score: 0.5958
              precision    recall  f1-score   support

           0     0.4769    0.5137    0.4946      2992
           1     0.7752    0.3031    0.4358      2992
           2     0.7163    0.5767    0.6390      3012
           3     0.2897    0.7165    0.4126      2998
           4     0.8245    0.6670    0.7374      2973
           5     0.8814    0.7322    0.7999      3054
           6     0.7400    0.3620    0.4861      3003
           7     0.5859    0.6614    0.6213      3012
           8     0.5884    0.7042    0.6411      2982
           9     0.7755    0.6211    0.6898      2982

    accuracy                         0.5860     30000
   macro avg     0.6654    0.5858    0.5958     30000
weighted avg     0.6656    0.5860    0.5960     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.3156
Precision: 0.6647, Recall: 0.5929, F1-Score: 0.6031
              precision    recall  f1-score   support

           0     0.4744    0.5150    0.4939      2992
           1     0.7648    0.3620    0.4914      2992
           2     0.6736    0.6215    0.6465      3012
           3     0.2978    0.7078    0.4192      2998
           4     0.8348    0.6512    0.7317      2973
           5     0.8497    0.7587    0.8016      3054
           6     0.7497    0.3580    0.4846      3003
           7     0.6386    0.6195    0.6289      3012
           8     0.5804    0.7126    0.6398      2982
           9     0.7835    0.6224    0.6937      2982

    accuracy                         0.5931     30000
   macro avg     0.6647    0.5929    0.6031     30000
weighted avg     0.6649    0.5931    0.6034     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.2944
Precision: 0.6609, Recall: 0.5921, F1-Score: 0.6042
              precision    recall  f1-score   support

           0     0.4959    0.5271    0.5110      2992
           1     0.7654    0.3499    0.4803      2992
           2     0.7298    0.5604    0.6340      3012
           3     0.2900    0.7018    0.4105      2998
           4     0.7943    0.6976    0.7428      2973
           5     0.8817    0.7295    0.7984      3054
           6     0.6654    0.4073    0.5053      3003
           7     0.5912    0.6564    0.6221      3012
           8     0.6345    0.6492    0.6418      2982
           9     0.7609    0.6415    0.6961      2982

    accuracy                         0.5922     30000
   macro avg     0.6609    0.5921    0.6042     30000
weighted avg     0.6612    0.5922    0.6045     30000

0.09816818431699743
{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.we

In [11]:
df_list = [report_to_df(df) for df in result_list]
new_df = append_nth_row(df_list)
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-11-03_16-44-41


Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.48,0.5264,0.5022,2992
1,1,0.7154,0.4412,0.5458,2992
2,2,0.6118,0.6806,0.6444,3012
3,3,0.2879,0.7105,0.4098,2998
4,4,0.7363,0.7666,0.7512,2973
5,5,0.7828,0.7917,0.7872,3054
6,6,0.7212,0.3696,0.4888,3003
7,7,0.5859,0.6614,0.6213,3012
8,8,0.5804,0.7126,0.6398,2982
9,9,0.7609,0.6415,0.6961,2982


In [12]:
print("Evaluate the original model")
result = evaluate_model(model, config, test_dataloader)

Evaluate the original model


Evaluating the model:   0%|          | 0/1875 [00:00<?, ?it/s]

Loss: 1.2240
Precision: 0.6478, Recall: 0.6149, F1-Score: 0.6195
              precision    recall  f1-score   support

           0     0.5321    0.4843    0.5071      2992
           1     0.7005    0.4723    0.5642      2992
           2     0.6957    0.6119    0.6511      3012
           3     0.3443    0.6421    0.4482      2998
           4     0.7254    0.7783    0.7509      2973
           5     0.8403    0.7600    0.7981      3054
           6     0.6719    0.4106    0.5097      3003
           7     0.6185    0.6384    0.6283      3012
           8     0.5854    0.7146    0.6436      2982
           9     0.7637    0.6362    0.6941      2982

    accuracy                         0.6150     30000
   macro avg     0.6478    0.6149    0.6195     30000
weighted avg     0.6481    0.6150    0.6198     30000

