In [2]:
import joblib
import numpy as np
import pandas as pd

preprocessing_object = joblib.load("/data/foundation_model/data/preprocessing_objects_20250615.pkl.z")
# print(preprocessing_object)

all_data = (
    pd.read_parquet("/data/foundation_model/data/qc_ac_te_mp_dos_reformat_20250615.pd.parquet").drop(
        index=preprocessing_object["dropped_idx"]
    )
    # .reset_index(drop=True)
)
all_data.head(3)

test_data = (
    all_data[all_data["split"] == "test"]
    #  .reset_index(drop=True)
)
test_data.head(3)


desc_trans = pd.read_parquet("/data/foundation_model/data/qc_ac_te_mp_dos_composition_desc_trans_20250615.pd.parquet")


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
def swap_train_val_split(split, swap_ratio=0.1, random_seed=None, train_ratio: float = 1.0):
    split = split.copy()
    train_idx = split[split == "train"].index
    val_idx = split[split == "val"].index

    # 先交换
    n_swap = int(min(len(train_idx), len(val_idx)) * swap_ratio)
    if n_swap > 0:
        rng = np.random.default_rng(random_seed)
        swap_train = rng.choice(train_idx, n_swap, replace=False)
        swap_val = rng.choice(val_idx, n_swap, replace=False)
        split.loc[swap_train] = "val"
        split.loc[swap_val] = "train"

    # 再采样train
    train_idx = split[split == "train"].index
    if train_ratio < 1.0:
        rng = np.random.default_rng(random_seed)
        n_train = round(len(train_idx) * train_ratio)
        sampled_train_idx = rng.choice(train_idx, n_train, replace=False)
        # 其余train直接丢弃
        drop_idx = train_idx.difference(sampled_train_idx)
        split.loc[drop_idx] = np.nan

    return split.dropna()


In [4]:
prop_name = "Material type"

mask = all_data[f"{prop_name}"].notnull()
org_splits = all_data.loc[mask, "split"]
prop = all_data.loc[mask, f"{prop_name}"]

In [6]:
org_splits

id
mp-111          val
mp-23158      train
mp-23155      train
mp-1525632      val
mp-1095223      val
              ...  
mp-1201016     test
mp-581173     train
mp-582182     train
mp-1199833      val
mp-1198953    train
Name: split, Length: 48998, dtype: object

In [4]:
import json
from datetime import datetime
from pathlib import Path

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

N_TRY = 20
prop_name = "Material type"
# Experiment 1: Use Fourier features
print("training for property:", prop_name)

for ratio in [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]:
    base_dir = Path(f"logs/classification/{prop_name}/{datetime.now().strftime('%m%d_%H%M')}_r{ratio}")

    mask = all_data[f"{prop_name}"].notnull()
    org_splits = all_data.loc[mask, "split"]
    prop = all_data.loc[mask, f"{prop_name}"]

    for n_try in range(N_TRY):
        # 1. Setup logging directory
        version = f"trial_{n_try + 1}"
        save_dir = base_dir / "predictions" / version
        save_dir.mkdir(parents=True, exist_ok=True)

        splits = swap_train_val_split(org_splits, swap_ratio=0.5, random_seed=None, train_ratio=ratio)
        splits.to_csv(f"{save_dir}/data_split.csv")

        X_train = desc_trans.loc[splits[splits == "train"].index]
        y_train = prop.loc[splits[splits == "train"].index]
        X_test = desc_trans.loc[splits[splits == "test"].index]
        y_test = prop.loc[splits[splits == "test"].index]

        # 编码类别
        le = preprocessing_object["material_type_label_encoder"]
        y_train_enc = le.transform(y_train)
        y_test_enc = le.transform(y_test)

        # 训练随机森林分类器
        clf = RandomForestClassifier(
            n_estimators=300, random_state=n_try, bootstrap=True, max_features="sqrt", n_jobs=60
        )
        clf = clf.fit(X_train, y_train_enc)
        y_pred_enc = clf.predict(X_test)
        y_fit_pred_enc = clf.predict(X_train)
        y_pred, y_true = le.inverse_transform(y_pred_enc), y_test.values
        y_fit_pred, y_fit_true = le.inverse_transform(y_fit_pred_enc), y_train.values

        # 保存预测结果
        results = pd.concat(
            [
                pd.DataFrame({"y_true": y_fit_true, "y_pred": y_fit_pred, "label": "train"}, index=X_train.index),
                pd.DataFrame({"y_true": y_true, "y_pred": y_pred, "label": "test"}, index=X_test.index),
            ]
        )
        results.to_parquet(save_dir / "clf_predictions.parquet")
        results.to_csv(save_dir / "clf_predictions.csv")

        # 保存模型
        joblib.dump(clf, save_dir / "clf_model.pkl.z")

        test_report = classification_report(y_test, y_pred, target_names=le.classes_, digits=4, output_dict=True)
        train_report = classification_report(
            y_fit_true, y_fit_pred, target_names=le.classes_, digits=4, output_dict=True
        )

        # 混淆矩阵
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(21, 8), sharey=True, dpi=130)
        plt.subplots_adjust(wspace=0.05)

        # Confusion matrix
        cm = confusion_matrix(y_true, y_pred, labels=le.classes_)
        hm1 = sns.heatmap(
            cm,
            annot=True,
            fmt="d",
            cmap="Blues",
            xticklabels=le.classes_,
            yticklabels=le.classes_,
            ax=ax1,
            annot_kws={"size": 13},
        )
        ax1.set_xlabel("Predicted", fontsize=18)
        ax1.set_ylabel("True", fontsize=18)
        ax1.set_title(f"{prop_name}", fontsize=18)
        ax1.tick_params(axis="both", labelsize=13)
        cbar1 = hm1.collections[0].colorbar
        cbar1.ax.tick_params(labelsize=13)

        # Normalized confusion matrix
        cm_norm = confusion_matrix(y_test, y_pred, labels=le.classes_, normalize="true")
        hm2 = sns.heatmap(
            cm_norm,
            annot=True,
            fmt=".2f",
            cmap="Blues",
            xticklabels=le.classes_,
            yticklabels=le.classes_,
            ax=ax2,
            annot_kws={"size": 13},
        )
        ax2.set_xlabel("Predicted", fontsize=18)
        ax2.set_title(f"{prop_name} (Normalized)", fontsize=18)
        ax2.tick_params(axis="both", labelsize=13)
        cbar2 = hm2.collections[0].colorbar
        cbar2.ax.tick_params(labelsize=13)

        _ = fig.savefig(f"{save_dir}/test_cm.png", bbox_inches="tight")
        plt.cla()
        plt.clf()

        # 分类指标
        print(classification_report(y_true, y_pred, target_names=le.classes_, digits=4))

        # 混淆矩阵
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(21, 8), sharey=True, dpi=150)
        plt.subplots_adjust(wspace=0.05)

        # Confusion matrix
        cm = confusion_matrix(y_fit_true, y_fit_pred, labels=le.classes_)
        hm1 = sns.heatmap(
            cm,
            annot=True,
            fmt="d",
            cmap="Blues",
            xticklabels=le.classes_,
            yticklabels=le.classes_,
            ax=ax1,
            annot_kws={"size": 15},
        )
        ax1.set_xlabel("Predicted", fontsize=18)
        ax1.set_ylabel("True", fontsize=18)
        ax1.set_title(f"{prop_name}", fontsize=18)
        ax1.tick_params(axis="both", labelsize=13)
        cbar1 = hm1.collections[0].colorbar
        cbar1.ax.tick_params(labelsize=13)

        # Normalized confusion matrix
        cm_norm = confusion_matrix(y_fit_true, y_fit_pred, labels=le.classes_, normalize="true")
        hm2 = sns.heatmap(
            cm_norm,
            annot=True,
            fmt=".2f",
            cmap="Blues",
            xticklabels=le.classes_,
            yticklabels=le.classes_,
            ax=ax2,
            annot_kws={"size": 13},
        )
        ax2.set_xlabel("Predicted", fontsize=18)
        ax2.set_title(f"{prop_name} (Normalized)", fontsize=18)
        ax2.tick_params(axis="both", labelsize=13)
        cbar2 = hm2.collections[0].colorbar
        cbar2.ax.tick_params(labelsize=13)

        _ = fig.savefig(f"{save_dir}/train_cm.png", bbox_inches="tight")
        plt.cla()
        plt.clf()

        print(classification_report(y_fit_true, y_fit_pred, target_names=le.classes_, digits=4))

        metrics = {"train": train_report, "test": test_report}
        with open(f"{save_dir}/metrics.json", "w") as f:
            json.dump(metrics, f, indent=2)


training for property: Material type
              precision    recall  f1-score   support

         DAC     0.5000    1.0000    0.6667         1
         DQC     0.7500    1.0000    0.8571         3
         IAC     0.8148    0.9167    0.8627        24
         IQC     0.8462    0.7857    0.8148        28
      others     0.9996    0.9992    0.9994      7298

    accuracy                         0.9981      7354
   macro avg     0.7821    0.9403    0.8402      7354
weighted avg     0.9982    0.9981    0.9981      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000        11
         DQC     1.0000    1.0000    1.0000        10
         IAC     1.0000    0.9765    0.9881        85
         IQC     0.9673    0.9737    0.9705       152
      others     0.9999    0.9999    0.9999     34037

    accuracy                         0.9997     34295
   macro avg     0.9934    0.9900    0.9917     34295
weighted avg     0.9997    0.9997    0.9

  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(21, 8), sharey=True, dpi=130)


              precision    recall  f1-score   support

         DAC     0.5000    1.0000    0.6667         1
         DQC     0.7500    1.0000    0.8571         3
         IAC     0.8800    0.9167    0.8980        24
         IQC     0.8519    0.8214    0.8364        28
      others     0.9995    0.9992    0.9993      7298

    accuracy                         0.9982      7354
   macro avg     0.7963    0.9475    0.8515      7354
weighted avg     0.9983    0.9982    0.9983      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         9
         DQC     1.0000    1.0000    1.0000         8
         IAC     1.0000    0.9634    0.9814        82
         IQC     0.9868    0.9615    0.9740       156
      others     0.9998    1.0000    0.9999     34040

    accuracy                         0.9997     34295
   macro avg     0.9973    0.9850    0.9911     34295
weighted avg     0.9997    0.9997    0.9997     34295

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.5000    0.6667    0.5714         3
         IAC     0.9091    0.8333    0.8696        24
         IQC     0.8333    0.8929    0.8621        28
      others     0.9993    0.9993    0.9993      7298

    accuracy                         0.9981      7354
   macro avg     0.6483    0.6784    0.6605      7354
weighted avg     0.9980    0.9981    0.9981      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         3
         DQC     1.0000    1.0000    1.0000         5
         IAC     1.0000    0.9815    0.9907        54
         IQC     0.9905    0.9811    0.9858       106
      others     0.9999    1.0000    0.9999     23838

    accuracy                         0.9998     24006
   macro avg     0.9981    0.9925    0.9953     24006
weighted avg     0.9998    0.9998    0.9998     24006

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.6000    1.0000    0.7500         3
         IAC     0.7857    0.9167    0.8462        24
         IQC     0.8077    0.7500    0.7778        28
      others     0.9992    0.9988    0.9990      7298

    accuracy                         0.9974      7354
   macro avg     0.6385    0.7331    0.6746      7354
weighted avg     0.9975    0.9974    0.9974      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         4
         DQC     1.0000    1.0000    1.0000         5
         IAC     1.0000    1.0000    1.0000        56
         IQC     0.9884    0.9551    0.9714        89
      others     0.9998    1.0000    0.9999     20423

    accuracy                         0.9998     20577
   macro avg     0.9976    0.9910    0.9943     20577
weighted avg     0.9998    0.9998    0.9998     20577

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.6000    1.0000    0.7500         3
         IAC     0.8333    0.8333    0.8333        24
         IQC     0.8519    0.8214    0.8364        28
      others     0.9993    0.9993    0.9993      7298

    accuracy                         0.9980      7354
   macro avg     0.6569    0.7308    0.6838      7354
weighted avg     0.9979    0.9980    0.9979      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         3
         DQC     1.0000    1.0000    1.0000         3
         IAC     1.0000    1.0000    1.0000        42
         IQC     0.9733    0.9865    0.9799        74
      others     0.9999    0.9999    0.9999     17026

    accuracy                         0.9998     17148
   macro avg     0.9947    0.9973    0.9960     17148
weighted avg     0.9998    0.9998    0.9998     17148



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.6000    1.0000    0.7500         3
         IAC     0.8636    0.7917    0.8261        24
         IQC     0.8065    0.8929    0.8475        28
      others     0.9995    0.9992    0.9993      7298

    accuracy                         0.9980      7354
   macro avg     0.6539    0.7367    0.6846      7354
weighted avg     0.9980    0.9980    0.9979      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         4
         DQC     1.0000    1.0000    1.0000         5
         IAC     1.0000    0.9722    0.9859        36
         IQC     1.0000    1.0000    1.0000        88
      others     0.9999    1.0000    1.0000     17015

    accuracy                         0.9999     17148
   macro avg     1.0000    0.9944    0.9972     17148
weighted avg     0.9999    0.9999    0.9999     17148

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.5000    1.0000    0.6667         3
         IAC     0.9048    0.7917    0.8444        24
         IQC     0.8333    0.7143    0.7692        28
      others     0.9986    0.9993    0.9990      7298

    accuracy                         0.9974      7354
   macro avg     0.6473    0.7011    0.6559      7354
weighted avg     0.9974    0.9974    0.9973      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         4
         DQC     1.0000    1.0000    1.0000         3
         IAC     1.0000    1.0000    1.0000        34
         IQC     0.9815    1.0000    0.9907        53
      others     1.0000    0.9999    1.0000     13624

    accuracy                         0.9999     13718
   macro avg     0.9963    1.0000    0.9981     13718
weighted avg     0.9999    0.9999    0.9999     13718

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     1.0000    0.3333    0.5000         3
         IAC     0.8000    0.8333    0.8163        24
         IQC     0.7353    0.8929    0.8065        28
      others     0.9996    0.9990    0.9993      7298

    accuracy                         0.9977      7354
   macro avg     0.7070    0.6117    0.6244      7354
weighted avg     0.9978    0.9977    0.9976      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         1
         DQC     1.0000    1.0000    1.0000         2
         IAC     1.0000    0.9706    0.9851        34
         IQC     0.9848    0.9848    0.9848        66
      others     0.9999    0.9999    0.9999     13615

    accuracy                         0.9998     13718
   macro avg     0.9969    0.9911    0.9940     13718
weighted avg     0.9998    0.9998    0.9998     13718

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.5000    1.0000    0.6667         1
         DQC     0.0000    0.0000    0.0000         3
         IAC     0.8947    0.7083    0.7907        24
         IQC     0.7419    0.8214    0.7797        28
      others     0.9989    0.9995    0.9992      7298

    accuracy                         0.9974      7354
   macro avg     0.6271    0.7058    0.6472      7354
weighted avg     0.9971    0.9974    0.9972      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         4
         DQC     1.0000    1.0000    1.0000         1
         IAC     1.0000    0.9643    0.9818        28
         IQC     1.0000    1.0000    1.0000        61
      others     0.9999    1.0000    1.0000     13624

    accuracy                         0.9999     13718
   macro avg     1.0000    0.9929    0.9964     13718
weighted avg     0.9999    0.9999    0.9999     13718

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.5000    1.0000    0.6667         1
         DQC     0.0000    0.0000    0.0000         3
         IAC     0.8077    0.8750    0.8400        24
         IQC     0.7778    0.7500    0.7636        28
      others     0.9990    0.9992    0.9991      7298

    accuracy                         0.9974      7354
   macro avg     0.6169    0.7248    0.6539      7354
weighted avg     0.9971    0.9974    0.9972      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         3
         DQC     1.0000    1.0000    1.0000         1
         IAC     1.0000    1.0000    1.0000        25
         IQC     1.0000    0.9839    0.9919        62
      others     0.9999    1.0000    1.0000     13627

    accuracy                         0.9999     13718
   macro avg     1.0000    0.9968    0.9984     13718
weighted avg     0.9999    0.9999    0.9999     13718



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.7500    1.0000    0.8571         3
         IAC     0.7500    0.8750    0.8077        24
         IQC     0.8636    0.6786    0.7600        28
      others     0.9990    0.9993    0.9992      7298

    accuracy                         0.9976      7354
   macro avg     0.6725    0.7106    0.6848      7354
weighted avg     0.9975    0.9976    0.9974      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         4
         DQC     1.0000    1.0000    1.0000         4
         IAC     1.0000    1.0000    1.0000        29
         IQC     0.9818    1.0000    0.9908        54
      others     1.0000    0.9999    1.0000     13627

    accuracy                         0.9999     13718
   macro avg     0.9964    1.0000    0.9982     13718
weighted avg     0.9999    0.9999    0.9999     13718



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.2500    0.3333    0.2857         3
         IAC     0.8095    0.7083    0.7556        24
         IQC     0.7692    0.7143    0.7407        28
      others     0.9985    0.9992    0.9988      7298

    accuracy                         0.9967      7354
   macro avg     0.5654    0.5510    0.5562      7354
weighted avg     0.9966    0.9967    0.9966      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         6
         DQC     1.0000    1.0000    1.0000         5
         IAC     1.0000    0.9730    0.9863        37
         IQC     0.9841    0.9688    0.9764        64
      others     0.9998    0.9999    0.9999     13606

    accuracy                         0.9997     13718
   macro avg     0.9968    0.9883    0.9925     13718
weighted avg     0.9997    0.9997    0.9997     13718

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.7500    1.0000    0.8571         3
         IAC     0.9444    0.7083    0.8095        24
         IQC     0.7778    0.7500    0.7636        28
      others     0.9984    0.9993    0.9988      7298

    accuracy                         0.9973      7354
   macro avg     0.6941    0.6915    0.6858      7354
weighted avg     0.9971    0.9973    0.9971      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         2
         DQC     1.0000    1.0000    1.0000         6
         IAC     1.0000    1.0000    1.0000        22
         IQC     1.0000    1.0000    1.0000        54
      others     1.0000    1.0000    1.0000     13634

    accuracy                         1.0000     13718
   macro avg     1.0000    1.0000    1.0000     13718
weighted avg     1.0000    1.0000    1.0000     13718

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     0.7500    1.0000    0.8571         3
         IAC     0.9444    0.7083    0.8095        24
         IQC     0.7692    0.7143    0.7407        28
      others     0.9981    0.9992    0.9986      7298

    accuracy                         0.9970      7354
   macro avg     0.6924    0.6844    0.6812      7354
weighted avg     0.9968    0.9970    0.9968      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         2
         DQC     1.0000    1.0000    1.0000         3
         IAC     1.0000    1.0000    1.0000        26
         IQC     1.0000    1.0000    1.0000        61
      others     1.0000    1.0000    1.0000     13626

    accuracy                         1.0000     13718
   macro avg     1.0000    1.0000    1.0000     13718
weighted avg     1.0000    1.0000    1.0000     13718

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

         DAC     0.0000    0.0000    0.0000         1
         DQC     1.0000    0.3333    0.5000         3
         IAC     0.6667    0.8333    0.7407        24
         IQC     0.7895    0.5357    0.6383        28
      others     0.9982    0.9990    0.9986      7298

    accuracy                         0.9963      7354
   macro avg     0.6909    0.5403    0.5755      7354
weighted avg     0.9962    0.9963    0.9961      7354

              precision    recall  f1-score   support

         DAC     1.0000    1.0000    1.0000         1
         DQC     1.0000    1.0000    1.0000         2
         IAC     1.0000    1.0000    1.0000        23
         IQC     0.9787    0.9787    0.9787        47
      others     0.9999    0.9999    0.9999     10215

    accuracy                         0.9998     10288
   macro avg     0.9957    0.9957    0.9957     10288
weighted avg     0.9998    0.9998    0.9998     10288

              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


ValueError: Number of classes, 4, does not match size of target_names, 5. Try specifying the labels parameter

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>

<Figure size 2730x1040 with 0 Axes>

<Figure size 3150x1200 with 0 Axes>