In [1]:
%%bash
DATASET="eurlex-4k"
wget -nv -nc https://archive.org/download/pecos-dataset/xmc-base/${DATASET}.tar.gz
tar --skip-old-files -zxf ${DATASET}.tar.gz 
find xmc-base/${DATASET}/*

xmc-base/eurlex-4k/X.trn.txt
xmc-base/eurlex-4k/X.tst.txt
xmc-base/eurlex-4k/Y.trn.npz
xmc-base/eurlex-4k/Y.trn.txt
xmc-base/eurlex-4k/Y.tst.npz
xmc-base/eurlex-4k/Y.tst.txt
xmc-base/eurlex-4k/output-items.txt
xmc-base/eurlex-4k/tfidf-attnxml
xmc-base/eurlex-4k/tfidf-attnxml/X.tst.npz
xmc-base/eurlex-4k/tfidf-attnxml/X.trn.npz


In [2]:
import logging
import numpy as np
from pecos.utils import smat_util, logging_util
from scipy.sparse import csr_matrix
import numpy as np

# set logging level to WARNING(1)
# you can change this to INFO(2) or DEBUG(3) if you'd like to see more logging
LOGGER = logging.getLogger(__name__)
logging_util.setup_logging_config(level=1)

# load training data
X_feat_trn = smat_util.load_matrix("xmc-base/eurlex-4k/tfidf-attnxml/X.trn.npz", dtype=np.float32)
Y_trn = smat_util.load_matrix("xmc-base/eurlex-4k/Y.trn.npz", dtype=np.float32)

with open("xmc-base/eurlex-4k/X.trn.txt", 'r') as fin:
    X_txt_trn = [xx.strip() for xx in fin.readlines()]

# load test data
X_feat_tst = smat_util.load_matrix("xmc-base/eurlex-4k/tfidf-attnxml/X.tst.npz", dtype=np.float32)
Y_tst = smat_util.load_matrix("xmc-base/eurlex-4k/Y.tst.npz", dtype=np.float32)

with open("xmc-base/eurlex-4k/X.tst.txt", 'r') as fin:
    X_txt_tst = [xx.strip() for xx in fin.readlines()]

In [3]:
import json
import requests
from pecos.xmc.xtransformer.model import XTransformer

# get XR-Transformer training params

params = json.load(open("params.json"))

    
eurlex4k_train_params = XTransformer.TrainParams.from_dict(params["train_params"])
print(eurlex4k_train_params)
eurlex4k_pred_params = XTransformer.PredParams.from_dict(params["pred_params"])

# you can view the detailed parameter setting via
print(json.dumps(eurlex4k_train_params.to_dict(), indent=True))
print(json.dumps(eurlex4k_pred_params.to_dict(), indent=True))

  from .autonotebook import tqdm as notebook_tqdm


XTransformer.TrainParams(preliminary_indexer_params=HierarchicalKMeans.TrainParams(nr_splits=16, min_codes=16, max_leaf_size=16, spherical=True, seed=0, kmeans_max_iter=20, threads=-1), refined_indexer_params=HierarchicalKMeans.TrainParams(nr_splits=8, min_codes=None, max_leaf_size=16, spherical=True, seed=0, kmeans_max_iter=20, threads=-1), matcher_params_chain=[TransformerMatcher.TrainParams(model_shortcut='bert-base-uncased', negative_sampling='tfn+man', loss_function='weighted-squared-hinge', bootstrap_method='weighted-linear', lr_schedule='linear', threshold=0.001, hidden_dropout_prob=0.1, batch_size=32, batch_gen_workers=16, max_active_matching_labels=1000, max_num_labels_in_gpu=65536, max_steps=600, max_no_improve_cnt=-1, num_train_epochs=10, gradient_accumulation_steps=1, weight_decay=0.0, max_grad_norm=1.0, learning_rate=5e-05, adam_epsilon=1e-08, warmup_steps=100, logging_steps=50, save_steps=200, cost_sensitive_ranker=False, pre_tokenize=True, pre_tensorize_labels=True, use_

In [6]:
from scipy.sparse import csr_matrix
import numpy as np

def apply_uniform_noise(Y, noise_level):
    Y_dense = Y.toarray()  # convert to dense matrix
    Y_noisy = Y_dense.copy()
    num_classes = Y_dense.shape[1]
    for i in range(num_classes):
        mask = np.random.rand(len(Y_dense)) < noise_level
        Y_noisy[mask, i] = 1 - Y_dense[mask, i]  # flip the labels
    return csr_matrix(Y_noisy)

# Y_trn = apply_uniform_noise(Y_trn, 0.2)


# construct label hierarchy
from pecos.xmc import Indexer, LabelEmbeddingFactory
cluster_chain = Indexer.gen(
    LabelEmbeddingFactory.create(Y_trn, X_feat_trn, method="pifa"),
    train_params=eurlex4k_train_params.refined_indexer_params,
)

# train XR-Linear model
from pecos.xmc.xlinear import XLinearModel
xlm = XLinearModel.train(
    X_feat_trn,
    Y_trn,
    C=cluster_chain,
    train_params=eurlex4k_train_params.ranker_params,
    pred_params=eurlex4k_pred_params.ranker_params,
)

# predict on test set with XR-Linear model
P_xlm = xlm.predict(X_feat_tst)

# compute metrics using ground truth
metrics = smat_util.Metrics.generate(Y_tst, P_xlm)
print("Evaluation metrics of XR-Linear model")
print(metrics)

Evaluation metrics of XR-Linear model
prec   = 85.05 77.98 71.30 64.77 58.78 53.15 48.02 43.61 39.94 36.84
recall = 17.26 31.33 42.44 50.89 57.28 61.73 64.78 67.08 68.96 70.53


#### Baseline 1: XR-Linear
Let's train a XR-Linear model on the TF-IDF features using the same hyper-parameters.

In [18]:
# construct label hierarchy
from pecos.xmc import Indexer, LabelEmbeddingFactory
cluster_chain = Indexer.gen(
    LabelEmbeddingFactory.create(Y_trn, X_feat_trn, method="pifa"),
    train_params=eurlex4k_train_params.refined_indexer_params,
)

# train XR-Linear model
from pecos.xmc.xlinear import XLinearModel
xlm = XLinearModel.train(
    X_feat_trn,
    Y_trn,
    C=cluster_chain,
    train_params=eurlex4k_train_params.ranker_params,
    pred_params=eurlex4k_pred_params.ranker_params,
)

# predict on test set with XR-Linear model
P_xlm = xlm.predict(X_feat_tst)

# compute metrics using ground truth
metrics = smat_util.Metrics.generate(Y_tst, P_xlm)
print("Evaluation metrics of XR-Linear model")
print(metrics)

Evaluation metrics of XR-Linear model
prec   = 85.05 77.98 71.30 64.77 58.78 53.15 48.02 43.61 39.94 36.84
recall = 17.26 31.33 42.44 50.89 57.28 61.73 64.78 67.08 68.96 70.53


#### Noise  generator

#### Uniform Flipping

In [None]:
def apply_uniform_noise(Y, noise_level):
    Y_dense = Y.toarray()  # convert to dense matrix
    Y_noisy = Y_dense.copy()
    num_classes = Y_dense.shape[1]
    for i in range(num_classes):
        mask = np.random.rand(len(Y_dense)) < noise_level
        Y_noisy[mask, i] = 1 - Y_dense[mask, i]  # flip the labels
    return csr_matrix(Y_noisy)

20%

In [6]:

Y_trn = apply_uniform_noise(Y_trn, 0.2)
# define the problem
from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)

Downloading (…)lve/main/config.json: 100%|██████████| 570/570 [00:00<00:00, 224kB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 7.81kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 1.04MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 792kB/s]
Downloading pytorch_model.bin: 100%|██████████| 440M/440M [00:22<00:00, 19.4MB/s] 
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceCl

Evaluation metrics of XR-Transformer (not fine-tuned)
prec   = 41.22 29.75 23.43 19.17 16.37 14.24 12.64 11.42 10.40 9.60
recall = 8.27 11.74 13.69 14.91 15.88 16.54 17.12 17.66 18.09 18.54


In [7]:
model_folder = "./work_dir/Uni20%"
xrt_pretrained.save(model_folder)

#### Pair flipping

In [4]:
def apply_pair_noise(Y, noise_level):
    Y_dense = Y.toarray()  # Convert the sparse matrix to a dense matrix
    Y_noisy = Y_dense.copy()
    num_classes = Y_dense.shape[1]
    for i in range(num_classes):
        for j in range(num_classes):
            if i != j:
                mask = np.random.rand(len(Y_dense)) < noise_level
                Y_noisy[mask & (Y_dense[:, i] == 1), i] = 0  # Flip class i labels to other class j
                Y_noisy[mask & (Y_dense[:, i] == 0), i] = 1  # Flip other class j labels to class i
    return csr_matrix(Y_noisy)


In [6]:
from scipy.sparse import csr_matrix, lil_matrix

def apply_pair_noise(Y, noise_level):
    Y_noisy = Y.copy()
    num_classes = Y.shape[1]
    
    mask = np.random.rand(Y.shape[0]) < noise_level
    mask = csr_matrix(mask[:, np.newaxis])  # 将掩码转换为稀疏矩阵
    
    Y_lil = Y_noisy.tolil()  # 将稀疏矩阵转换为LIL格式
    
    for i in range(num_classes):
        flip_indices = mask.multiply(Y[:, i] == 0).nonzero()[0]  # 获取需要翻转的索引
        Y_lil[flip_indices, i] = 1  # 翻转 i 到其他类别 j
        
        flip_indices = mask.multiply(Y[:, i] == 1).nonzero()[0]  # 获取需要翻转的索引
        Y_lil[flip_indices, i] = 0  # 翻转其他类别 j 到 i
    
    return csr_matrix(Y_lil)

Pair flipping  
noise_level: 20%

In [7]:
Y_trn = apply_pair_noise(Y_trn, 0.2)


print('Noise applied')


from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)


model_folder = "./work_dir/Pair20%"
xrt_pretrained.save(model_folder)

  Y_trn = apply_pair_noise(Y_trn, 0.2)


Noise applied


'HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-uncased/resolve/main/tokenizer_config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fd6627470d0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))' thrown while requesting HEAD https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a 

prec   = 80.65 74.01 67.47 61.35 55.69 50.45 45.64 41.49 38.00 35.03
recall = 16.35 29.66 40.23 48.31 54.34 58.75 61.76 64.00 65.82 67.31


Pair flipping  
noise_level: 40%

In [8]:
Y_trn = apply_pair_noise(Y_trn, 0.4)


print('Noise applied')


from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)


model_folder = "./work_dir/Pair40%"
xrt_pretrained.save(model_folder)

  Y_trn = apply_pair_noise(Y_trn, 0.4)


Noise applied


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForXMC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
'HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-uncased/resolve/main/config.json (Caused by ConnectTime

prec   = 66.18 60.27 55.19 50.49 46.05 41.87 37.98 34.67 31.95 29.58
recall = 13.38 24.04 32.70 39.56 44.76 48.65 51.35 53.44 55.28 56.79


Pair flipping  
noise_level: 60%

In [9]:
Y_trn = apply_pair_noise(Y_trn, 0.6)


print('Noise applied')


from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)


model_folder = "./work_dir/Pair60%"
xrt_pretrained.save(model_folder)

  Y_trn = apply_pair_noise(Y_trn, 0.6)


Noise applied


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForXMC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.LayerNorm.weight', 'cl

prec   = 60.21 55.30 50.89 46.65 42.64 38.97 35.79 32.77 30.23 28.02
recall = 12.11 22.05 30.23 36.70 41.64 45.45 48.53 50.67 52.50 54.01


Pair flipping  
noise_level: 80%

In [10]:
Y_trn = apply_pair_noise(Y_trn, 0.8)


print('Noise applied')


from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)


model_folder = "./work_dir/Pair80%"
xrt_pretrained.save(model_folder)

  Y_trn = apply_pair_noise(Y_trn, 0.8)


Noise applied


'HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-uncased/resolve/main/tokenizer_config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fd6415e6fa0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))' thrown while requesting HEAD https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a 

prec   = 62.95 56.95 52.38 48.22 44.16 40.30 36.84 33.77 31.05 28.76
recall = 12.68 22.69 31.07 37.82 43.04 46.91 49.81 52.05 53.72 55.22


### Uniform Flipping

Uniform flipping    
noise_level: 40%

In [7]:

Y_trn = apply_uniform_noise(Y_trn, 0.4)
print('Noise applied')
# define the problem
from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)
model_folder = "./work_dir/Uni40%"
xrt_pretrained.save(model_folder)

Noise applied


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForXMC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.dense.bias', 'cls.seq_

prec   = 0.28 0.35 0.33 0.29 0.28 0.28 0.29 0.29 0.26 0.26
recall = 0.05 0.13 0.19 0.22 0.27 0.32 0.39 0.45 0.46 0.52


Uniform flipping    
noise_level: 60%

In [8]:

Y_trn = apply_uniform_noise(Y_trn, 0.6)
print('Noise applied')
# define the problem
from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)
model_folder = "./work_dir/Uni60%"
xrt_pretrained.save(model_folder)

Noise applied


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForXMC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.dense.bias', 'cls.seq_

prec   = 0.05 0.06 0.11 0.09 0.09 0.09 0.10 0.10 0.11 0.11
recall = 0.01 0.02 0.06 0.06 0.08 0.09 0.12 0.14 0.17 0.22


Uniform flipping    
noise_level: 80%

In [9]:

Y_trn = apply_uniform_noise(Y_trn, 0.8)
print('Noise applied')
# define the problem
from pecos.xmc.xtransformer.module import MLProblemWithText
prob = MLProblemWithText(X_txt_trn, Y_trn, X_feat=X_feat_trn)


eurlex4k_train_params.do_fine_tune = True


xrt_pretrained = XTransformer.train(
    prob,
    train_params=eurlex4k_train_params,
    pred_params=eurlex4k_pred_params,
)

# predict and compute metrics
P_xrt_pretrained = xrt_pretrained.predict(X_txt_tst, X_feat=X_feat_tst)
metrics = smat_util.Metrics.generate(Y_tst, P_xrt_pretrained)

print(metrics)
model_folder = "./work_dir/Uni80%"
xrt_pretrained.save(model_folder)

Noise applied


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForXMC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForXMC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForXMC: ['cls.predictions.transform.dense.bias', 'cls.seq_

prec   = 0.16 0.13 0.16 0.17 0.16 0.15 0.16 0.15 0.15 0.16
recall = 0.02 0.04 0.08 0.11 0.13 0.15 0.19 0.21 0.23 0.28


The model trained by adding Uniform flipping noise data sets is worse than the model trained by adding pair flipping noise data sets