In [9]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import pandas as pd
import numpy as np
import sklearn
import re
import math
import joblib
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [10]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import re
import copy
from tqdm import tqdm
import gc

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    classification_report
)

from transformers import (
    AutoTokenizer,
    AutoModel,
    get_linear_schedule_with_warmup
)

import nltk
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize

from transformers import BertTokenizer, BertForMaskedLM

from sklearn.metrics import (
    recall_score,
    precision_score
  )

from sklearn.metrics import multilabel_confusion_matrix

In [11]:
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
import nltk

In [12]:
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

In [13]:
time_now = pd.Timestamp.now().strftime("%m-%d-%H-%M")
print(f"Time now: {time_now}")

Time now: 06-03-10-56


# Config

In [None]:
class Config:
    def __init__(self):
        super(Config, self).__init__()

        self.SEED = 42
        self.MODEL_PATH = 'allenai/scibert_scivocab_uncased'
        self.NUM_LABELS_TA = 14
        self.NUM_LABELS_TE = 185

        self.TOKENIZER = AutoTokenizer.from_pretrained(self.MODEL_PATH)
        self.MAX_LENGTH = 320
        self.BATCH_SIZE = 16

        self.DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.FULL_FINETUNING = True
        self.LR = 3e-5
        self.OPTIMIZER = 'AdamW'
        self.CRITERION = 'BCEWithLogitsLoss'
        self.N_VALIDATE_DUR_TRAIN = 3
        self.N_WARMUP = 0
        self.SAVE_BEST_ONLY = True
        self.EPOCHS = 20

In [15]:
TACTIC = ['TA0001', 'TA0002', 'TA0003', 'TA0004', 'TA0005', 'TA0006', 'TA0007', 'TA0008', 'TA0009', 'TA0010', 'TA0011', 'TA0040', 'TA0042', 'TA0043']
TECHNIQUE = ['T1001', 'T1003', 'T1005', 'T1006', 'T1007', 'T1008', 'T1010', 'T1011', 'T1012', 'T1014', 'T1016', 'T1018', 'T1020', 'T1021', 'T1025', 'T1027', 'T1029', 'T1030', 'T1033', 'T1036', 'T1037', 'T1039', 'T1040', 'T1041', 'T1046', 'T1047', 'T1048', 'T1049', 'T1052', 'T1053', 'T1055', 'T1056', 'T1057', 'T1059', 'T1068', 'T1069', 'T1071', 'T1072', 'T1074', 'T1078', 'T1080', 'T1082', 'T1083', 'T1087', 'T1090', 'T1091', 'T1092', 'T1095', 'T1098', 'T1102', 'T1104', 'T1105', 'T1106', 'T1110', 'T1111', 'T1112', 'T1113', 'T1114', 'T1115', 'T1119', 'T1120', 'T1123', 'T1124', 'T1125', 'T1127', 'T1129', 'T1132', 'T1133', 'T1134', 'T1135', 'T1136', 'T1137', 'T1140', 'T1176', 'T1185', 'T1187', 'T1189', 'T1190', 'T1195', 'T1197', 'T1199', 'T1200', 'T1201', 'T1202', 'T1203', 'T1204', 'T1205', 'T1207', 'T1210', 'T1211', 'T1212', 'T1213', 'T1216', 'T1218', 'T1220', 'T1221', 'T1222', 'T1480', 'T1482', 'T1485', 'T1486', 'T1489', 'T1490', 'T1491', 'T1495', 'T1496', 'T1497', 'T1498', 'T1499', 'T1505', 'T1518', 'T1525', 'T1526', 'T1528', 'T1529', 'T1531', 'T1534', 'T1535', 'T1537', 'T1538', 'T1539', 'T1542', 'T1543', 'T1546', 'T1547', 'T1548', 'T1550', 'T1552', 'T1553', 'T1555', 'T1556', 'T1557', 'T1558', 'T1559', 'T1560', 'T1561', 'T1562', 'T1563', 'T1564', 'T1565', 'T1566', 'T1567', 'T1568', 'T1569', 'T1570', 'T1571', 'T1572', 'T1573', 'T1574', 'T1578', 'T1580', 'T1583', 'T1584', 'T1585', 'T1586', 'T1587', 'T1588', 'T1589', 'T1590', 'T1591', 'T1592', 'T1593', 'T1594', 'T1595', 'T1596', 'T1597', 'T1598', 'T1599', 'T1600', 'T1601', 'T1602', 'T1606', 'T1608', 'T1609', 'T1610', 'T1611', 'T1612', 'T1613', 'T1614', 'T1615', 'T1619', 'T1620', 'T1621', 'T1622', 'T1647']
TACTICS_TECHNIQUES_RELATIONSHIP_DF = {
"TA0001":pd.Series(['T1078', 'T1091', 'T1133', 'T1189', 'T1190', 'T1195', 'T1199', 'T1200', 'T1566', 'T1659', 'T1669']),
"TA0002":pd.Series(['T1047', 'T1053', 'T1059', 'T1072', 'T1106', 'T1129', 'T1203', 'T1204', 'T1559', 'T1569', 'T1609', 'T1610', 'T1648', 'T1651', 'T1674', 'T1675']),
"TA0003":pd.Series(['T1037', 'T1053', 'T1078', 'T1098', 'T1112', 'T1133', 'T1136', 'T1137', 'T1176', 'T1197', 'T1205', 'T1505', 'T1525', 'T1542', 'T1543', 'T1546', 'T1547', 'T1554', 'T1556', 'T1574', 'T1653', 'T1668', 'T1671']),
"TA0004":pd.Series(['T1037', 'T1053', 'T1055', 'T1068', 'T1078', 'T1098', 'T1134', 'T1484', 'T1543', 'T1546', 'T1547', 'T1548', 'T1574', 'T1611']),
"TA0005":pd.Series(['T1006', 'T1014', 'T1027', 'T1036', 'T1055', 'T1070', 'T1078', 'T1112', 'T1127', 'T1134', 'T1140', 'T1197', 'T1202', 'T1205', 'T1207', 'T1211', 'T1216', 'T1218', 'T1220', 'T1221', 'T1222', 'T1480', 'T1484', 'T1497', 'T1535', 'T1542', 'T1548', 'T1550', 'T1553', 'T1556', 'T1562', 'T1564', 'T1574', 'T1578', 'T1599', 'T1600', 'T1601', 'T1610', 'T1612', 'T1620', 'T1622', 'T1647', 'T1656', 'T1666', 'T1672']),
"TA0006":pd.Series(['T1003', 'T1040', 'T1056', 'T1110', 'T1111', 'T1187', 'T1212', 'T1528', 'T1539', 'T1552', 'T1555', 'T1556', 'T1557', 'T1558', 'T1606', 'T1621', 'T1649']),
"TA0007":pd.Series(['T1007', 'T1010', 'T1012', 'T1016', 'T1018', 'T1033', 'T1040', 'T1046', 'T1049', 'T1057', 'T1069', 'T1082', 'T1083', 'T1087', 'T1120', 'T1124', 'T1135', 'T1201', 'T1217', 'T1482', 'T1497', 'T1518', 'T1526', 'T1538', 'T1580', 'T1613', 'T1614', 'T1615', 'T1619', 'T1622', 'T1652', 'T1654', 'T1673']),
"TA0008":pd.Series(['T1021', 'T1072', 'T1080', 'T1091', 'T1210', 'T1534', 'T1550', 'T1563', 'T1570']),
"TA0009":pd.Series(['T1005', 'T1025', 'T1039', 'T1056', 'T1074', 'T1113', 'T1114', 'T1115', 'T1119', 'T1123', 'T1125', 'T1185', 'T1213', 'T1530', 'T1557', 'T1560', 'T1602']),
"TA0010":pd.Series(['T1011', 'T1020', 'T1029', 'T1030', 'T1041', 'T1048', 'T1052', 'T1537', 'T1567']),
"TA0011":pd.Series(['T1001', 'T1008', 'T1071', 'T1090', 'T1092', 'T1095', 'T1102', 'T1104', 'T1105', 'T1132', 'T1205', 'T1219', 'T1568', 'T1571', 'T1572', 'T1573', 'T1659', 'T1665']),
"TA0040":pd.Series(['T1485', 'T1486', 'T1489', 'T1490', 'T1491', 'T1495', 'T1496', 'T1498', 'T1499', 'T1529', 'T1531', 'T1561', 'T1565', 'T1657', 'T1667']),                
"TA0042":pd.Series(['T1583', 'T1584', 'T1585', 'T1586', 'T1587', 'T1588', 'T1608', 'T1650']),
"TA0043":pd.Series(['T1589', 'T1590', 'T1591', 'T1592', 'T1593', 'T1594', 'T1595', 'T1596', 'T1597', 'T1598'])
}

In [16]:
df = pd.read_csv('merged_all_columns.csv', encoding='utf-8')
df_tram = pd.read_csv(r"C:\Users\user\OneDrive\桌面\研究所\課程內容\遞迴神經網路與變形器\Final_project\dataset\tram_with_all_labels.csv", encoding='utf-8')
df_attack = pd.read_csv(r"C:\Users\user\OneDrive\桌面\研究所\課程內容\遞迴神經網路與變形器\Final_project\dataset\attack_with_all_labels.csv", encoding='utf-8')


# df_attack['tactic_label'] = df_attack.apply(lambda x: list(x[TACTIC]), axis=1)
# df_attack['technique_label'] = df_attack.apply(lambda x: list(x[TECHNIQUE]), axis=1)

df_tactic_labels = df[TACTIC]
df_technique_labels = df[TECHNIQUE]

In [17]:
print(df_tactic_labels)

      TA0001  TA0002  TA0003  TA0004  TA0005  TA0006  TA0007  TA0008  TA0009  \
0          1       1       0       0       0       0       0       0       0   
1          1       1       0       0       0       0       0       0       0   
2          0       1       0       0       0       1       0       0       0   
3          1       1       0       0       0       0       0       0       0   
4          1       0       0       1       1       0       0       0       0   
...      ...     ...     ...     ...     ...     ...     ...     ...     ...   
1513       0       1       1       0       0       0       0       0       0   
1514       0       0       0       0       0       0       0       0       0   
1515       0       0       0       1       0       0       0       0       0   
1516       0       1       0       0       0       0       0       0       1   
1517       0       0       0       0       0       0       0       0       0   

      TA0010  TA0011  TA0040  TA0042  T

In [18]:
print(df_technique_labels)

      T1001  T1003  T1005  T1006  T1007  T1008  T1010  T1011  T1012  T1014  \
0         0      0      0      0      0      0      0      0      0      0   
1         0      0      0      0      0      0      0      0      0      0   
2         0      1      0      0      0      0      0      0      0      0   
3         0      0      0      0      0      0      0      0      0      0   
4         0      0      0      0      0      0      0      0      0      0   
...     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
1513      0      0      0      0      0      0      0      0      0      0   
1514      0      0      0      0      0      0      0      0      0      0   
1515      0      0      0      0      0      0      0      0      0      0   
1516      0      0      1      0      0      0      0      0      0      0   
1517      0      0      0      0      0      0      0      0      0      0   

      ...  T1611  T1612  T1613  T1614  T1615  T1619  T1620  T16

In [19]:
df['tactic_label'] = df.apply(lambda x: list(x[TACTIC]), axis=1)
df['technique_label'] = df.apply(lambda x: list(x[TECHNIQUE]), axis=1)
df_tram['tactic_label'] = df_tram.apply(lambda x: list(x[TACTIC]), axis=1)
df_tram['technique_label'] = df_tram.apply(lambda x: list(x[TECHNIQUE]), axis=1)
df_attack['tactic_label'] = df_attack.apply(lambda x: list(x[TACTIC]), axis=1)
df_attack['technique_label'] = df_attack.apply(lambda x: list(x[TECHNIQUE]), axis=1)

df = pd.concat([df,df_tram, df_attack], ignore_index=True)


# Data processing

In [20]:
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
import nltk

In [21]:
import nltk

resources = [
    "punkt",
    "stopwords",
    "wordnet",
    "omw-1.4",
    "averaged_perceptron_tagger",
    "averaged_perceptron_tagger_eng"
]

for res in resources:
    try:
        nltk.data.find(res)
        print(f"{res} ✅ 已安裝")
    except LookupError:
        print(f"{res} ⛔ 未安裝，開始下載...")
        nltk.download(res)


punkt ⛔ 未安裝，開始下載...


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...


stopwords ⛔ 未安裝，開始下載...
wordnet ⛔ 未安裝，開始下載...
omw-1.4 ⛔ 未安裝，開始下載...
averaged_perceptron_tagger ⛔ 未安裝，開始下載...
averaged_perceptron_tagger_eng ⛔ 未安裝，開始下載...


[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [22]:
def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return 'n'

def ioc_sub(text):
    def reg_handler(obj):
        s = obj.group(1)
        s = ' '.join(s.split('\\'))
        return s

    def file_handler(obj):
        s = obj.group(2)
        s = s.split('\\')[-1]
        return s
    
    text = re.sub(r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.|\[\.\])){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\/([0-2][0-9]|3[0-2]|[0-9]))?', 'IPv4', text)
    text = re.sub(r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', 'IP', text)
    text = re.sub(r'\b(CVE\-[0-9]{4}\-[0-9]{4,6})\b', 'CVE', text)
    text = re.sub(r'CVE-[0-9]{4}-[0-9]{4,6}', 'vulnerability', text)
    text = re.sub(r'\b([a-z][_a-z0-9-.]+@[a-z0-9-]+\.[a-z]+)\b', 'email', text)
    text = re.sub(r'\b((HKLM|HKCU|HKCR|HKU|HKCC)\\[\\A-Za-z0-9-_]+)\b', reg_handler, text)
    text = re.sub(r'\b([a-zA-Z]{1}:\\([0-9a-zA-Z_\.\-\/\\]+))\b', file_handler, text)
    text = re.sub(r'\b([a-f0-9]{32}|[A-F0-9]{32})\b', 'MD5', text)
    text = re.sub(r'\b([a-f0-9]{40}|[A-F0-9]{40})\b', 'SHA1', text)
    text = re.sub(r'\b([a-f0-9]{64}|[A-F0-9]{64})\b', 'SHA256', text)
    text = re.sub(r'\d+:[A-Za-z0-9/+]+:[A-Za-z0-9/+]+', 'ssdeep', text)
    text = re.sub(r'\b[a-fA-F\d]{32}\b|\b[a-fA-F\d]{40}\b|\b[a-fA-F\d]{64}\b', 'hash', text)
    text = re.sub(r'h[tx][tx]ps?:[\\\/][\\\/](?:[0-9a-zA-Z_\.\-\/\\]|\[\.\])+', 'URL', text)
    text = re.sub(r'((?:[0-9a-zA-Z_\-]+\.)+(?:(?!exe|dll)[a-z]{2,4}))', 'domain', text)
    text = re.sub(r'[a-fA-F0-9]{16}', '', text)
    text = re.sub(r'[0-9]{8}', '', text)
    text = re.sub(r'x[A-Fa-f0-9]{2}', '', text)
    
    return text

def rmstopword_and_lemmatize(text):
    token = [word for word in word_tokenize(text.lower()) if word not in stopwords.words('english')]
    # tag = pos_tag(token)
    tag = nltk.pos_tag(token, lang='eng')
    lemmatizer = WordNetLemmatizer()
    text = ' '.join(list(map(lambda x: lemmatizer.lemmatize(x[0], pos=get_wordnet_pos(x[1])), tag)))
    
    return text

def preprocess(text):
    text = str(text)
    #text = text.lower()
    text = re.sub(r'etc\.', '', text)
    text = re.sub(r'et al\.', '', text)
    text = re.sub(r'e\.g\.', '', text)
    text = re.sub(r'i\.e\.', '', text)
    #text = re.sub(r'\[.\]', '.', text)
    text = re.sub(r'\[\d+\]', '', text)
    
    text = ioc_sub(text)
    
    text = re.sub(r'[^A-Za-z0-9_\[\]]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    
    text = rmstopword_and_lemmatize(text)
    
    text = re.sub(r'[\[\]]', ' ', text)
    text = re.sub(r' [a-z0-9] ', '', text)
    
    return text


In [23]:
df['text_clean'] = df['Text'].map(lambda t: preprocess(t))

  df['text_clean'] = df['Text'].map(lambda t: preprocess(t))


In [24]:
print(df['text_clean'][0])

waspthemes visual cs style editor aka yellow pencil visual theme customizer plugin2wordpress allows yp_option_update csrf demonstrate use yp_remote_get obtain admin access


In [25]:
print(df.iloc[0])

ID                                               CVE_2019_11886
Text          The WaspThemes Visual CSS Style Editor (aka ye...
TA0001                                                        1
TA0002                                                        1
TA0003                                                        0
                                    ...                        
T1134.001                                                   NaN
T1052.001                                                   NaN
T1557.003                                                   NaN
T1596.001                                                   NaN
text_clean    waspthemes visual cs style editor aka yellow p...
Name: 0, Length: 596, dtype: object


# Dataset & Dataloader

In [26]:
def get_texts(df):
    texts = df['text']
    texts = texts.values.tolist()
    return texts

def get_labels(df):
    labels = df['labels'].values
    return labels

def get_ids(df):
    ids = df['ID'].values.tolist()

In [27]:
from torch.utils.data import Dataset, DataLoader
class TransformerDataset_ta(Dataset):
    def __init__(self, df, indices, set_type=None):
        super(TransformerDataset_ta, self).__init__()

        df = df.iloc[indices]
        self.texts = get_texts(df)
        self.set_type = set_type
        if self.set_type != 'test':
            self.labels = get_labels(df)

        self.tokenizer = Config().TOKENIZER
        self.max_length = Config().MAX_LENGTH

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        tokenized = self.tokenizer.encode_plus(
            self.texts[index],
            max_length=self.max_length,
            pad_to_max_length=True,
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=False,
            return_tensors='pt'
        )

        input_ids = tokenized['input_ids'].squeeze()
        attention_mask = tokenized['attention_mask'].squeeze()

        if self.set_type != 'test':
            return {
                'input_ids': input_ids.long(),
                'attention_mask': attention_mask.long(),
                'labels': torch.Tensor(self.labels[index]).float(),
            }

        return {
            'input_ids': input_ids.long(),
            'attention_mask': attention_mask.long(),
        }

In [28]:

class TransformerDataset_te(Dataset):
    def __init__(self, df, indices, set_type=None):
        super(TransformerDataset_te, self).__init__()

        df = df.iloc[indices]
        self.texts = get_texts(df)
        self.set_type = set_type
        if self.set_type != 'test':
            self.labels = get_labels(df)

        self.tokenizer = Config().TOKENIZER
        self.max_length = Config().MAX_LENGTH

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        tokenized = self.tokenizer.encode_plus(
            self.texts[index],
            max_length=self.max_length,
            pad_to_max_length=True,
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=False,
            return_tensors='pt'
        )

        input_ids = tokenized['input_ids'].squeeze()
        attention_mask = tokenized['attention_mask'].squeeze()

        if self.set_type != 'test':
            return {
                'input_ids': input_ids.long(),
                'attention_mask': attention_mask.long(),
                'labels': torch.Tensor(self.labels[index]).float(),
            }

        return {
            'input_ids': input_ids.long(),
            'attention_mask': attention_mask.long(),
        }

In [29]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2, random_state=2222)
validation, test = train_test_split(test, test_size=0.5, random_state=2222)

In [30]:
df_ta_train = train[['text_clean', 'tactic_label']]
df_ta_val = validation[['text_clean', 'tactic_label']]
df_ta_test = test[['text_clean', 'tactic_label']]
df_ta_train = df_ta_train.rename(columns={'text_clean':'text', 'tactic_label':'labels'})
df_ta_val = df_ta_val.rename(columns={'text_clean':'text', 'tactic_label':'labels'})
df_ta_test = df_ta_test.rename(columns={'text_clean':'text', 'tactic_label':'labels'})


In [31]:
# df_te_train = df.iloc[train.index][['text_clean', 'technique_label']]
# df_te_val = df.iloc[validation.index][['text_clean', 'technique_label']]
# df_te_test = df.iloc[test.index][['text_clean', 'technique_label']]
# df_te_train = df_te_train.rename(columns={'text_clean':'text', 'technique_label':'labels'})
# df_te_val = df_te_val.rename(columns={'text_clean':'text', 'technique_label':'labels'})
# df_te_test = df_te_test.rename(columns={'text_clean':'text', 'technique_label':'labels'})

In [32]:
train_data_ta = TransformerDataset_ta(df_ta_train, range(len(df_ta_train)))
val_data_ta = TransformerDataset_ta(df_ta_val, range(len(df_ta_val)))
test_data_ta = TransformerDataset_ta(df_ta_test, range(len(df_ta_test)))

In [33]:
# train_data_te = TransformerDataset_te(df_ta_train, range(len(df_te_train)))
# val_data_te = TransformerDataset_te(df_ta_val, range(len(df_te_val)))
# test_data_te = TransformerDataset_te(df_ta_test, range(len(df_te_test)))

In [34]:
train_dataloader_ta = DataLoader(train_data_ta, batch_size=Config().BATCH_SIZE)
val_dataloader_ta = DataLoader(val_data_ta, batch_size=Config().BATCH_SIZE)
test_dataloader_ta = DataLoader(test_data_ta, batch_size=Config().BATCH_SIZE)

In [35]:
# train_dataloader_te = DataLoader(train_data_te, batch_size=Config().BATCH_SIZE)
# val_dataloader_te = DataLoader(val_data_te, batch_size=Config().BATCH_SIZE)
# test_dataloader_te = DataLoader(test_data_te, batch_size=Config().BATCH_SIZE)

In [36]:
train_dataloader = train_dataloader_ta
val_dataloader = val_dataloader_ta
test_dataloader = test_dataloader_ta

In [37]:
b = next(iter(train_dataloader_ta))
for k, v in b.items():
    print(f'{k} shape: {v.shape}')

input_ids shape: torch.Size([16, 320])
attention_mask shape: torch.Size([16, 320])
labels shape: torch.Size([16, 14])


# Model

In [38]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.transformer_model = AutoModel.from_pretrained(
                Config().MODEL_PATH
        )
        self.dropout = nn.Dropout(0.3)

        self.output = nn.Linear(768, Config().NUM_LABELS_TA)

    def forward(
        self,
        input_ids,
        attention_mask=None,
        token_type_ids=None
        ):

        _, o2 = self.transformer_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            return_dict=False
        )

        x = self.dropout(o2)
        out = self.output(x)

        return out

# Train and test

In [39]:
BEST_F1 = 0
BEST_TRUE = []
BEST_PREDICTED = []

In [40]:
metrics_log = [] 

In [41]:
def val(model, val_dataloader, criterion, is_final_test=False):
    global BEST_F1, BEST_TRUE, BEST_PREDICTED, device

    val_loss = 0
    true, pred = [], []

    model.eval()

    results = {}

    for step, batch in enumerate(val_dataloader):
        b_input_ids = batch['input_ids'].to(device)
        b_attention_mask = batch['attention_mask'].to(device)
        b_labels = batch['labels'].to(device)

        with torch.no_grad():
            logits = model(input_ids=b_input_ids, attention_mask=b_attention_mask)

            loss = criterion(logits, b_labels)
            val_loss += loss.item()

            logits = torch.sigmoid(logits)
            logits = np.round(logits.cpu().numpy())
            labels = b_labels.cpu().numpy()

            pred.extend(logits)
            true.extend(labels)

    # avg_val_loss = val_loss / len(val_dataloader)
    # print('Val loss:', avg_val_loss)
    # print('Val accuracy:', accuracy_score(true, pred))

    # print('Val precision:', precision_score(true, pred, average='weighted'))
    # print('Val recall:', recall_score(true, pred, average='weighted'))

    # val_micro_f1_score = f1_score(true, pred, average='micro')
    # print('Val micro f1 score:', val_micro_f1_score)

    # val_macro_f1_score = f1_score(true, pred, average='macro')
    # print('Val macro f1 score:', val_macro_f1_score)

    val_weighted_f1_score = f1_score(true, pred, average='weighted')
    # print('Val weighted f1 score:', val_weighted_f1_score)

    results.update({
        "f1_sampled": f1_score(true, pred, average='samples', zero_division=0),
        "f1_micro": f1_score(true, pred, average='micro', zero_division=0),
        "f1_macro": f1_score(true, pred, average='macro', zero_division=0),
        "f_05_sample": fbeta_score(true, pred, average='samples', beta=0.5, zero_division=0),
        "f_05_micro": fbeta_score(true, pred, average='micro', beta=0.5, zero_division=0),
        "f_05_macro": fbeta_score(true, pred, average='macro', beta=0.5, zero_division=0),
        "precision_sampled": precision_score(true, pred, average='samples', zero_division=0),
        "precision_micro": precision_score(true, pred, average='micro', zero_division=0),
        "precision_macro": precision_score(true, pred, average='macro', zero_division=0),
        "recall_sampled": recall_score(true, pred, average='samples', zero_division=0),
        "recall_micro": recall_score(true, pred, average='micro', zero_division=0),
        "recall_macro": recall_score(true, pred, average='macro', zero_division=0),
        "accuracy": accuracy_score(true, pred),
        })

    labels_float = labels.astype(float)
    results.update({
        "coverage_error": coverage_error(labels_float, logits),
        "label_ranking_loss": label_ranking_loss(labels_float, logits),
        "lrap": label_ranking_average_precision_score(labels_float, logits),
    })

    metrics_log.append(results.copy())

    print(results)


    if (is_final_test is True):
      BEST_F1 = val_weighted_f1_score
      BEST_TRUE = true
      BEST_PREDICTED = pred
    elif (val_weighted_f1_score > BEST_F1):
      BEST_F1 = val_weighted_f1_score
      BEST_TRUE = true
      BEST_PREDICTED = pred

    return val_weighted_f1_score



In [42]:
def train(model, train_dataloader, val_dataloader, criterion, optimizer, scheduler, epoch):
    global device
    nv = Config().N_VALIDATE_DUR_TRAIN
    temp = len(train_dataloader) // nv
    temp = temp - (temp % 100)
    validate_at_steps = [temp * x for x in range(1, nv + 1)]

    train_loss = 0
    train_losses = []

    for step, batch in enumerate(tqdm(train_dataloader,
                                      desc='Epoch ' + str(epoch))):
        model.train()

        b_input_ids = batch['input_ids'].to(device)
        b_attention_mask = batch['attention_mask'].to(device)
        b_labels = batch['labels'].to(device)

        optimizer.zero_grad()

        logits = model(input_ids=b_input_ids, attention_mask=b_attention_mask)

        loss = criterion(logits, b_labels)
        train_loss += loss.item()

        loss.backward()

        optimizer.step()

        scheduler.step()

        if step in validate_at_steps:
            print(f'-- Step: {step}')
            _ = val(model, val_dataloader, criterion)


    avg_train_loss = train_loss / len(train_dataloader)
    print('Training loss:', avg_train_loss)

In [43]:
def run():
    global train_data, val_data, test_data, train_dataloader, val_dataloader, test_dataloader, model
    torch.manual_seed(Config().SEED)

    criterion = nn.BCEWithLogitsLoss()

    if Config().FULL_FINETUNING:
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.001,
            },
            {
                "params": [
                    p for n, p in param_optimizer if any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.0,
            },
        ]
        optimizer = optim.AdamW(optimizer_parameters, lr=Config().LR)

    num_training_steps = len(train_dataloader) * Config().EPOCHS
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )

    max_val_weighted_f1_score = float('-inf')
    for epoch in range(Config().EPOCHS):
        train(model, train_dataloader, val_dataloader, criterion, optimizer, scheduler, epoch)
        val_weighted_f1_score = val(model, val_dataloader, criterion)

        if Config().SAVE_BEST_ONLY:
            if val_weighted_f1_score > max_val_weighted_f1_score:
                best_model = copy.deepcopy(model)

                model_name = 'scibert_best_model'
                torch.save(best_model.state_dict(), model_name + '.pt')

                print(f'--- Best Model. Val: {max_val_weighted_f1_score} -> {val_weighted_f1_score}')
                max_val_weighted_f1_score = val_weighted_f1_score

    return best_model, max_val_weighted_f1_score

# Tactic_multi model

In [44]:
from sklearn.metrics import coverage_error, label_ranking_average_precision_score, label_ranking_loss
from sklearn.metrics import hamming_loss, precision_score, recall_score, f1_score, fbeta_score
from sklearn.metrics import accuracy_score

In [45]:
project_dir = './'
config = Config()
device = Config().DEVICE
model = Model()
model.to(device)

Model(
  (transformer_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31090, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [46]:
best_model_ta, best_val_weighted_f1_score_ta = run()

Epoch 0:  28%|██▊       | 399/1406 [00:38<01:36, 10.45it/s]

-- Step: 400


Epoch 0:  29%|██▊       | 403/1406 [00:45<12:34,  1.33it/s]

{'f1_sampled': 0.004255571360834518, 'f1_micro': 0.010223298358891578, 'f1_macro': 0.0066690066690066685, 'f_05_sample': 0.005349804691909955, 'f_05_micro': 0.02489517819706499, 'f_05_macro': 0.013410502540937324, 'precision_sampled': 0.006756756756756757, 'precision_micro': 0.5757575757575758, 'precision_macro': 0.04112554112554113, 'recall_sampled': 0.0033309625414888575, 'recall_micro': 0.00515743756786102, 'recall_macro': 0.0036287242169595114, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 0:  57%|█████▋    | 800/1406 [01:24<00:59, 10.22it/s]

-- Step: 800


Epoch 0:  57%|█████▋    | 803/1406 [01:30<08:45,  1.15it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 0:  85%|████████▌ | 1199/1406 [02:09<00:20, 10.12it/s]

-- Step: 1200


Epoch 0:  85%|████████▌ | 1201/1406 [02:15<03:31,  1.03s/it]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 0: 100%|██████████| 1406/1406 [02:35<00:00,  9.05it/s]


Training loss: 0.2830146870669905
{'f1_sampled': 0.006187766714082503, 'f1_micro': 0.013430029546065002, 'f1_macro': 0.008647526807333102, 'f_05_sample': 0.0074221307773939356, 'f_05_micro': 0.032552083333333336, 'f_05_macro': 0.016846361185983826, 'precision_sampled': 0.008890469416785207, 'precision_micro': 0.6410256410256411, 'precision_macro': 0.04578754578754579, 'recall_sampled': 0.005079421526789947, 'recall_micro': 0.0067861020629750276, 'recall_macro': 0.004774637127578305, 'accuracy': 0.004623044096728307, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}
--- Best Model. Val: -inf -> 0.012290567416719904


Epoch 1:  28%|██▊       | 400/1406 [00:38<01:38, 10.21it/s]

-- Step: 400


Epoch 1:  29%|██▊       | 402/1406 [00:45<17:16,  1.03s/it]

{'f1_sampled': 0.013786154575628259, 'f1_micro': 0.027953586497890294, 'f1_macro': 0.015708358032009484, 'f_05_sample': 0.01608187134502924, 'f_05_micro': 0.06438289601554907, 'f_05_macro': 0.02348457993619284, 'precision_sampled': 0.018847795163584636, 'precision_micro': 0.49074074074074076, 'precision_macro': 0.03505291005291005, 'recall_sampled': 0.011747273589378852, 'recall_micro': 0.014386536373507057, 'recall_macro': 0.010122230710466005, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 1:  57%|█████▋    | 799/1406 [01:23<00:57, 10.48it/s]

-- Step: 800


Epoch 1:  57%|█████▋    | 803/1406 [01:30<07:22,  1.36it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 1:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.58it/s]

-- Step: 1200


Epoch 1:  85%|████████▌ | 1202/1406 [02:15<03:28,  1.02s/it]

{'f1_sampled': 0.009257942152678994, 'f1_micro': 0.01928743637824806, 'f1_macro': 0.0121580547112462, 'f_05_sample': 0.010847078281288808, 'f_05_micro': 0.04639175257731959, 'f_05_macro': 0.022556390977443608, 'precision_sampled': 0.012802275960170697, 'precision_micro': 0.7346938775510204, 'precision_macro': 0.052478134110787174, 'recall_sampled': 0.007865101944049312, 'recall_micro': 0.009771986970684038, 'recall_macro': 0.006875477463712758, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 1: 100%|██████████| 1406/1406 [02:35<00:00,  9.06it/s]


Training loss: 0.2792987271634647
{'f1_sampled': 0.013786154575628259, 'f1_micro': 0.02792413066385669, 'f1_macro': 0.01557907113462669, 'f_05_sample': 0.01608187134502924, 'f_05_micro': 0.06413359148112294, 'f_05_macro': 0.02302745915884602, 'precision_sampled': 0.018847795163584636, 'precision_micro': 0.4732142857142857, 'precision_macro': 0.03380102040816326, 'recall_sampled': 0.011747273589378852, 'recall_micro': 0.014386536373507057, 'recall_macro': 0.010122230710466005, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}
--- Best Model. Val: 0.012290567416719904 -> 0.022142241167455307


Epoch 2:  28%|██▊       | 399/1406 [00:39<01:36, 10.47it/s]

-- Step: 400


Epoch 2:  29%|██▊       | 403/1406 [00:45<12:27,  1.34it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 2:  57%|█████▋    | 800/1406 [01:24<00:59, 10.14it/s]

-- Step: 800


Epoch 2:  57%|█████▋    | 802/1406 [01:30<10:22,  1.03s/it]

{'f1_sampled': 0.0094950213371266, 'f1_micro': 0.01981788966256026, 'f1_macro': 0.012466307277628031, 'f_05_sample': 0.011143427261848315, 'f_05_micro': 0.047631307929969106, 'f_05_macro': 0.02302140368342459, 'precision_sampled': 0.013157894736842105, 'precision_micro': 0.74, 'precision_macro': 0.05285714285714286, 'recall_sampled': 0.008042911332385017, 'recall_micro': 0.01004343105320304, 'recall_macro': 0.00706646294881589, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 2:  85%|████████▌ | 1199/1406 [02:09<00:19, 10.42it/s]

-- Step: 1200


Epoch 2:  86%|████████▌ | 1203/1406 [02:15<02:32,  1.33it/s]

{'f1_sampled': 0.008937885253674727, 'f1_micro': 0.018225676762262128, 'f1_macro': 0.01153715643026807, 'f_05_sample': 0.010370803133961028, 'f_05_micro': 0.04390495867768595, 'f_05_macro': 0.021606507371631927, 'precision_sampled': 0.01209103840682788, 'precision_micro': 0.723404255319149, 'precision_macro': 0.05167173252279635, 'recall_sampled': 0.007657657657657657, 'recall_micro': 0.009229098805646038, 'recall_macro': 0.006493506493506494, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 2: 100%|██████████| 1406/1406 [02:35<00:00,  9.04it/s]


Training loss: 0.27858235742128423
{'f1_sampled': 0.010443338074917022, 'f1_micro': 0.021356113187399894, 'f1_macro': 0.013106159895150722, 'f_05_sample': 0.01215101379575064, 'f_05_micro': 0.0508646998982706, 'f_05_macro': 0.02296738631143776, 'precision_sampled': 0.01422475106685633, 'precision_micro': 0.6451612903225806, 'precision_macro': 0.04608294930875576, 'recall_sampled': 0.008931958274063538, 'recall_micro': 0.010857763300760043, 'recall_macro': 0.007639419404125286, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 3:  28%|██▊       | 399/1406 [00:38<01:38, 10.24it/s]

-- Step: 400


Epoch 3:  29%|██▊       | 403/1406 [00:45<12:16,  1.36it/s]

{'f1_sampled': 0.013786154575628259, 'f1_micro': 0.028609271523178808, 'f1_macro': 0.02098659353335138, 'f_05_sample': 0.016259680733364947, 'f_05_micro': 0.06669960474308301, 'f_05_macro': 0.03639949935835485, 'precision_sampled': 0.019203413940256046, 'precision_micro': 0.5934065934065934, 'precision_macro': 0.08333333333333333, 'recall_sampled': 0.011569464201043147, 'recall_micro': 0.014657980456026058, 'recall_macro': 0.012435277141159495, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 3:  57%|█████▋    | 799/1406 [01:23<00:58, 10.41it/s]

-- Step: 800


Epoch 3:  57%|█████▋    | 803/1406 [01:30<07:38,  1.31it/s]

{'f1_sampled': 0.014023233760075865, 'f1_micro': 0.028578989150568935, 'f1_macro': 0.018742260852838994, 'f_05_sample': 0.016378220325588746, 'f_05_micro': 0.06643700787401575, 'f_05_macro': 0.03146388591132526, 'precision_sampled': 0.019203413940256046, 'precision_micro': 0.5684210526315789, 'precision_macro': 0.08799171842650103, 'recall_sampled': 0.011925082977714555, 'recall_micro': 0.014657980456026058, 'recall_macro': 0.011374246668364315, 'accuracy': 0.008179231863442389, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 3:  85%|████████▌ | 1200/1406 [02:09<00:19, 10.33it/s]

-- Step: 1200


Epoch 3:  86%|████████▌ | 1203/1406 [02:15<02:54,  1.16it/s]

{'f1_sampled': 0.01257705073494547, 'f1_micro': 0.02549800796812749, 'f1_macro': 0.017335319151593497, 'f_05_sample': 0.014614943891259682, 'f_05_micro': 0.059880239520958084, 'f_05_macro': 0.030485091232754783, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5925925925925926, 'precision_macro': 0.11301989150090416, 'recall_sampled': 0.010769321953532479, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.010228333757745523, 'accuracy': 0.008179231863442389, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 3: 100%|██████████| 1406/1406 [02:35<00:00,  9.05it/s]


Training loss: 0.2776752659470733
{'f1_sampled': 0.012600758653390234, 'f1_micro': 0.026483050847457626, 'f1_macro': 0.01762174918357131, 'f_05_sample': 0.014955745218903114, 'f_05_micro': 0.061697926949654494, 'f_05_macro': 0.02974597041032479, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.5434782608695652, 'precision_macro': 0.06798029556650247, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.010610304727951787, 'accuracy': 0.006401137980085348, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}
--- Best Model. Val: 0.022142241167455307 -> 0.02217441792840833


Epoch 4:  28%|██▊       | 399/1406 [00:38<01:39, 10.13it/s]

-- Step: 400


Epoch 4:  29%|██▊       | 403/1406 [00:45<12:20,  1.35it/s]

{'f1_sampled': 0.011889521100047417, 'f1_micro': 0.02497343251859724, 'f1_macro': 0.014821822768842637, 'f_05_sample': 0.014066698277224593, 'f_05_micro': 0.05869130869130869, 'f_05_macro': 0.02432712215320911, 'precision_sampled': 0.016714082503556188, 'precision_micro': 0.5875, 'precision_macro': 0.04249547920433996, 'recall_sampled': 0.00996917970602181, 'recall_micro': 0.012757871878393051, 'recall_macro': 0.008976317799847212, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 4:  57%|█████▋    | 800/1406 [01:23<00:57, 10.47it/s]

-- Step: 800


Epoch 4:  57%|█████▋    | 802/1406 [01:30<10:17,  1.02s/it]

{'f1_sampled': 0.011510194404931244, 'f1_micro': 0.024986709197235512, 'f1_macro': 0.027885400200436, 'f_05_sample': 0.013903706337916866, 'f_05_micro': 0.05880880880880881, 'f_05_macro': 0.04472169570158224, 'precision_sampled': 0.016714082503556188, 'precision_micro': 0.6025641025641025, 'precision_macro': 0.08383838383838384, 'recall_sampled': 0.009346846846846847, 'recall_micro': 0.012757871878393051, 'recall_macro': 0.017464561582208642, 'accuracy': 0.00604551920341394, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 4:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.52it/s]

-- Step: 1200


Epoch 4:  85%|████████▌ | 1202/1406 [02:14<03:26,  1.01s/it]

{'f1_sampled': 0.013703176861071597, 'f1_micro': 0.02867002920095567, 'f1_macro': 0.028319440620123988, 'f_05_sample': 0.01623216261374156, 'f_05_micro': 0.06723107569721115, 'f_05_macro': 0.04872196263529486, 'precision_sampled': 0.019203413940256046, 'precision_micro': 0.6506024096385542, 'precision_macro': 0.09377289377289377, 'recall_sampled': 0.011421289710763392, 'recall_micro': 0.014657980456026058, 'recall_macro': 0.016679399032340208, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 4: 100%|██████████| 1406/1406 [02:34<00:00,  9.09it/s]


Training loss: 0.27713695285076095
{'f1_sampled': 0.014141773352299668, 'f1_micro': 0.028443508032657362, 'f1_macro': 0.017025620087781974, 'f_05_sample': 0.01643749012170065, 'f_05_micro': 0.06528046421663443, 'f_05_macro': 0.026477846582729704, 'precision_sampled': 0.019203413940256046, 'precision_micro': 0.4778761061946903, 'precision_macro': 0.06981981981981981, 'recall_sampled': 0.01210289236605026, 'recall_micro': 0.014657980456026058, 'recall_macro': 0.010843731431966727, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}
--- Best Model. Val: 0.02217441792840833 -> 0.022720033177299395


Epoch 5:  28%|██▊       | 399/1406 [00:38<01:38, 10.25it/s]

-- Step: 400


Epoch 5:  29%|██▊       | 403/1406 [00:45<12:16,  1.36it/s]

{'f1_sampled': 0.016121384542437174, 'f1_micro': 0.03254593175853018, 'f1_macro': 0.020777411817814607, 'f_05_sample': 0.018810398744609276, 'f_05_micro': 0.07402101241642789, 'f_05_macro': 0.0351680644207192, 'precision_sampled': 0.022048364153627313, 'precision_micro': 0.49206349206349204, 'precision_macro': 0.11092214663643234, 'recall_sampled': 0.013732811759127547, 'recall_micro': 0.016829533116178068, 'recall_macro': 0.012712486882482749, 'accuracy': 0.008179231863442389, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 5:  57%|█████▋    | 799/1406 [01:23<00:58, 10.36it/s]

-- Step: 800


Epoch 5:  57%|█████▋    | 803/1406 [01:30<07:23,  1.36it/s]

{'f1_sampled': 0.020649596965386437, 'f1_micro': 0.045336112558624284, 'f1_macro': 0.04350393020399411, 'f_05_sample': 0.023375969099653313, 'f_05_micro': 0.10116279069767442, 'f_05_macro': 0.07919991835737737, 'precision_sampled': 0.026849217638691324, 'precision_micro': 0.564935064935065, 'precision_macro': 0.20662131519274377, 'recall_sampled': 0.01906709340919867, 'recall_micro': 0.023615635179153095, 'recall_macro': 0.024962445150207326, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 5:  85%|████████▌ | 1199/1406 [02:08<00:19, 10.50it/s]

-- Step: 1200


Epoch 5:  86%|████████▌ | 1203/1406 [02:14<02:28,  1.37it/s]

{'f1_sampled': 0.02068515884305358, 'f1_micro': 0.04462895692786715, 'f1_macro': 0.039439625037541594, 'f_05_sample': 0.023077503340661235, 'f_05_micro': 0.09853345554537121, 'f_05_macro': 0.07140682646573507, 'precision_sampled': 0.026137980085348508, 'precision_micro': 0.5058823529411764, 'precision_macro': 0.1975270728446417, 'recall_sampled': 0.01939307728781413, 'recall_micro': 0.023344191096634093, 'recall_macro': 0.022794033851825764, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 5: 100%|██████████| 1406/1406 [02:34<00:00,  9.10it/s]


Training loss: 0.276888066894808
{'f1_sampled': 0.01685633001422475, 'f1_micro': 0.03467297084318361, 'f1_macro': 0.02376339753275259, 'f_05_sample': 0.019837036284404707, 'f_05_micro': 0.07902298850574713, 'f_05_macro': 0.042396230508868815, 'precision_sampled': 0.023470839260312945, 'precision_micro': 0.5365853658536586, 'precision_macro': 0.1847933019361591, 'recall_sampled': 0.01423660502607871, 'recall_micro': 0.017915309446254073, 'recall_macro': 0.014002405179474724, 'accuracy': 0.008890469416785207, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}
--- Best Model. Val: 0.022720033177299395 -> 0.029983150865915246


Epoch 6:  28%|██▊       | 400/1406 [00:38<01:35, 10.59it/s]

-- Step: 400


Epoch 6:  29%|██▊       | 404/1406 [00:43<11:04,  1.51it/s]

{'f1_sampled': 0.015267899478425793, 'f1_micro': 0.031804929764113435, 'f1_macro': 0.03167703013358771, 'f_05_sample': 0.01805470884418253, 'f_05_micro': 0.07425742574257425, 'f_05_macro': 0.05725320855375106, 'precision_sampled': 0.021337126600284494, 'precision_micro': 0.6741573033707865, 'precision_macro': 0.14485407066052228, 'recall_sampled': 0.012754860123281173, 'recall_micro': 0.016286644951140065, 'recall_macro': 0.018213594429326626, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 6:  57%|█████▋    | 799/1406 [01:21<00:57, 10.49it/s]

-- Step: 800


Epoch 6:  57%|█████▋    | 803/1406 [01:27<06:38,  1.51it/s]

{'f1_sampled': 0.0036747273589378847, 'f1_micro': 0.008092797410304829, 'f1_macro': 0.018159806295399514, 'f_05_sample': 0.00447910316331369, 'f_05_micro': 0.01986228813559322, 'f_05_macro': 0.030612244897959183, 'precision_sampled': 0.005334281650071123, 'precision_micro': 0.6521739130434783, 'precision_macro': 0.056390977443609026, 'recall_sampled': 0.002904220009483167, 'recall_micro': 0.004071661237785016, 'recall_macro': 0.010822510822510822, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 6:  85%|████████▌ | 1199/1406 [02:05<00:19, 10.53it/s]

-- Step: 1200


Epoch 6:  86%|████████▌ | 1203/1406 [02:12<02:31,  1.34it/s]

{'f1_sampled': 0.005168326220957799, 'f1_micro': 0.010735373054213635, 'f1_macro': 0.010733485389993826, 'f_05_sample': 0.005464944609681451, 'f_05_micro': 0.02596053997923157, 'f_05_macro': 0.023019610672223172, 'precision_sampled': 0.005867709815078236, 'precision_micro': 0.47619047619047616, 'precision_macro': 0.1744743351886209, 'recall_sampled': 0.005126837363679469, 'recall_micro': 0.0054288816503800215, 'recall_macro': 0.0057042301355240965, 'accuracy': 0.0039118065433854906, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 6: 100%|██████████| 1406/1406 [02:31<00:00,  9.26it/s]


Training loss: 0.2788659791497893
{'f1_sampled': 0.004409672830725461, 'f1_micro': 0.00967741935483871, 'f1_macro': 0.019047619047619046, 'f_05_sample': 0.005353332655964235, 'f_05_micro': 0.023510971786833857, 'f_05_macro': 0.026455026455026454, 'precision_sampled': 0.006401137980085348, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.0035265528686581316, 'recall_micro': 0.004885993485342019, 'recall_macro': 0.012987012987012988, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 7:  28%|██▊       | 400/1406 [00:38<01:38, 10.26it/s]

-- Step: 400


Epoch 7:  29%|██▊       | 402/1406 [00:45<16:52,  1.01s/it]

{'f1_sampled': 0.00605737316263632, 'f1_micro': 0.012861736334405145, 'f1_macro': 0.008181623473977297, 'f_05_sample': 0.00717799566483777, 'f_05_micro': 0.030959752321981424, 'f_05_macro': 0.016707057354777118, 'precision_sampled': 0.008534850640113799, 'precision_micro': 0.5, 'precision_macro': 0.08571428571428572, 'recall_sampled': 0.005067567567567568, 'recall_micro': 0.006514657980456026, 'recall_macro': 0.0044414188924452045, 'accuracy': 0.005334281650071123, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 7:  57%|█████▋    | 799/1406 [01:23<00:59, 10.19it/s]

-- Step: 800


Epoch 7:  57%|█████▋    | 803/1406 [01:29<07:14,  1.39it/s]

{'f1_sampled': 0.00435040303461356, 'f1_micro': 0.009695663883652034, 'f1_macro': 0.020089285714285716, 'f_05_sample': 0.005310997087312877, 'f_05_micro': 0.02368421052631579, 'f_05_macro': 0.029900332225913623, 'precision_sampled': 0.006401137980085348, 'precision_micro': 0.6206896551724138, 'precision_macro': 0.04433497536945813, 'recall_sampled': 0.0034672830725462306, 'recall_micro': 0.004885993485342019, 'recall_macro': 0.012987012987012988, 'accuracy': 0.0039118065433854906, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 7:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.45it/s]

-- Step: 1200


Epoch 7:  85%|████████▌ | 1202/1406 [02:14<03:24,  1.00s/it]

{'f1_sampled': 0.0036984352773826454, 'f1_micro': 0.008075370121130552, 'f1_macro': 0.005291005291005291, 'f_05_sample': 0.004464285714285715, 'f_05_micro': 0.019695378151260504, 'f_05_macro': 0.010757314974182445, 'precision_sampled': 0.005334281650071123, 'precision_micro': 0.4838709677419355, 'precision_macro': 0.03456221198156682, 'recall_sampled': 0.002993124703651019, 'recall_micro': 0.004071661237785016, 'recall_macro': 0.0028647822765469825, 'accuracy': 0.0039118065433854906, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 7: 100%|██████████| 1406/1406 [02:34<00:00,  9.11it/s]


Training loss: 0.2794347104866847
{'f1_sampled': 0.005452821242294926, 'f1_micro': 0.01552047096601552, 'f1_macro': 0.0225148978598094, 'f_05_sample': 0.0056485911749069645, 'f_05_micro': 0.037217659137577006, 'f_05_macro': 0.03816932585188303, 'precision_sampled': 0.005867709815078236, 'precision_micro': 0.5471698113207547, 'precision_macro': 0.07991718426501035, 'recall_sampled': 0.005393551446183025, 'recall_micro': 0.007871878393051032, 'recall_macro': 0.013496307613954674, 'accuracy': 0.004267425320056899, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 8:  28%|██▊       | 399/1406 [00:38<01:37, 10.31it/s]

-- Step: 400


Epoch 8:  29%|██▊       | 403/1406 [00:45<12:18,  1.36it/s]

{'f1_sampled': 0.0038762446657183494, 'f1_micro': 0.008602150537634409, 'f1_macro': 0.013445378151260503, 'f_05_sample': 0.004743700467384679, 'f_05_micro': 0.02089864158829676, 'f_05_macro': 0.020554984583761562, 'precision_sampled': 0.005689900426742532, 'precision_micro': 0.4444444444444444, 'precision_macro': 0.031746031746031744, 'recall_sampled': 0.0030523944997629202, 'recall_micro': 0.004343105320304018, 'recall_macro': 0.008528784648187633, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 8:  57%|█████▋    | 799/1406 [01:23<00:58, 10.45it/s]

-- Step: 800


Epoch 8:  57%|█████▋    | 803/1406 [01:29<07:15,  1.39it/s]

{'f1_sampled': 0.0041488857278330965, 'f1_micro': 0.009110396570203644, 'f1_macro': 0.01652089407191448, 'f_05_sample': 0.005071801124432704, 'f_05_micro': 0.021929824561403508, 'f_05_macro': 0.020864015709376536, 'precision_sampled': 0.00604551920341394, 'precision_micro': 0.3541666666666667, 'precision_macro': 0.025297619047619048, 'recall_sampled': 0.0032598387861545756, 'recall_micro': 0.004614549402823019, 'recall_macro': 0.012265512265512266, 'accuracy': 0.003200568990042674, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 8:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.42it/s]

-- Step: 1200


Epoch 8:  85%|████████▌ | 1202/1406 [02:14<03:23,  1.00it/s]

{'f1_sampled': 0.002513039355144618, 'f1_micro': 0.0053806833467850415, 'f1_macro': 0.0035100035100035097, 'f_05_sample': 0.00298254081148818, 'f_05_micro': 0.01310272536687631, 'f_05_macro': 0.007058159232072275, 'precision_sampled': 0.0035561877667140826, 'precision_micro': 0.30303030303030304, 'precision_macro': 0.021645021645021644, 'recall_sampled': 0.0021040777619724988, 'recall_micro': 0.0027144408251900108, 'recall_macro': 0.0019098548510313215, 'accuracy': 0.0039118065433854906, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 8: 100%|██████████| 1406/1406 [02:34<00:00,  9.11it/s]


Training loss: 0.28095622110587265
{'f1_sampled': 0.004587482219061166, 'f1_micro': 0.010669511869831956, 'f1_macro': 0.017421602787456445, 'f_05_sample': 0.00572024091760934, 'f_05_micro': 0.02535496957403651, 'f_05_macro': 0.019896538002387585, 'precision_sampled': 0.007112375533428165, 'precision_micro': 0.3076923076923077, 'precision_macro': 0.02197802197802198, 'recall_sampled': 0.0035976766239924135, 'recall_micro': 0.0054288816503800215, 'recall_macro': 0.01443001443001443, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 9:  28%|██▊       | 400/1406 [00:38<01:37, 10.32it/s]

-- Step: 400


Epoch 9:  29%|██▊       | 403/1406 [00:45<14:05,  1.19it/s]

{'f1_sampled': 0.013786154575628259, 'f1_micro': 0.02792413066385669, 'f1_macro': 0.01557907113462669, 'f_05_sample': 0.01608187134502924, 'f_05_micro': 0.06413359148112294, 'f_05_macro': 0.02302745915884602, 'precision_sampled': 0.018847795163584636, 'precision_micro': 0.4732142857142857, 'precision_macro': 0.03380102040816326, 'recall_sampled': 0.011747273589378852, 'recall_micro': 0.014386536373507057, 'recall_macro': 0.010122230710466005, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 9:  57%|█████▋    | 799/1406 [01:23<00:59, 10.28it/s]

-- Step: 800


Epoch 9:  57%|█████▋    | 803/1406 [01:29<07:22,  1.36it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 9:  85%|████████▌ | 1199/1406 [02:08<00:19, 10.46it/s]

-- Step: 1200


Epoch 9:  86%|████████▌ | 1203/1406 [02:14<02:28,  1.37it/s]

{'f1_sampled': 0.0016239924134660976, 'f1_micro': 0.0037776578521316784, 'f1_macro': 0.002525252525252525, 'f_05_sample': 0.002017289846237215, 'f_05_micro': 0.009278897136797455, 'f_05_macro': 0.005411255411255411, 'precision_sampled': 0.0024893314366998577, 'precision_micro': 0.3181818181818182, 'precision_macro': 0.022727272727272728, 'recall_sampled': 0.0012743006164058794, 'recall_micro': 0.0019001085776330076, 'recall_macro': 0.001336898395721925, 'accuracy': 0.003200568990042674, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 9: 100%|██████████| 1406/1406 [02:34<00:00,  9.11it/s]


Training loss: 0.2775161495461403
{'f1_sampled': 0.013750592697961118, 'f1_micro': 0.02916998143728454, 'f1_macro': 0.030088225766497074, 'f_05_sample': 0.016380337104021314, 'f_05_micro': 0.06820436507936507, 'f_05_macro': 0.050741149589893904, 'precision_sampled': 0.019559032716927455, 'precision_micro': 0.632183908045977, 'precision_macro': 0.09371002132196161, 'recall_sampled': 0.011421289710763392, 'recall_micro': 0.01492942453854506, 'recall_macro': 0.01793141499023852, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 10:  28%|██▊       | 399/1406 [00:38<01:37, 10.28it/s]

-- Step: 400


Epoch 10:  29%|██▊       | 403/1406 [00:45<12:06,  1.38it/s]

{'f1_sampled': 0.014995258416311044, 'f1_micro': 0.03371970495258166, 'f1_macro': 0.03133960004355243, 'f_05_sample': 0.0169153688890531, 'f_05_micro': 0.07744433688286544, 'f_05_macro': 0.050946544837774256, 'precision_sampled': 0.019203413940256046, 'precision_micro': 0.5714285714285714, 'precision_macro': 0.08804268736357021, 'recall_sampled': 0.013406827880512092, 'recall_micro': 0.01737242128121607, 'recall_macro': 0.01911976911976912, 'accuracy': 0.008534850640113799, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 10:  57%|█████▋    | 800/1406 [01:23<00:59, 10.26it/s]

-- Step: 800


Epoch 10:  57%|█████▋    | 802/1406 [01:29<10:00,  1.01it/s]

{'f1_sampled': 0.014556661925082976, 'f1_micro': 0.03013481363996828, 'f1_macro': 0.030149451435482477, 'f_05_sample': 0.017165661902504008, 'f_05_micro': 0.06985294117647059, 'f_05_macro': 0.04925577269030779, 'precision_sampled': 0.02027027027027027, 'precision_micro': 0.5757575757575758, 'precision_macro': 0.08534322820037106, 'recall_sampled': 0.01222143195827406, 'recall_micro': 0.015472312703583062, 'recall_macro': 0.018313385960444784, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 10:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.37it/s]

-- Step: 1200


Epoch 10:  86%|████████▌ | 1203/1406 [02:14<02:52,  1.18it/s]

{'f1_sampled': 0.010028449502133711, 'f1_micro': 0.021282255919127427, 'f1_macro': 0.012726694241170856, 'f_05_sample': 0.011930868838763577, 'f_05_micro': 0.050200803212851405, 'f_05_macro': 0.0211954217888936, 'precision_sampled': 0.01422475106685633, 'precision_micro': 0.5333333333333333, 'precision_macro': 0.03809523809523809, 'recall_sampled': 0.008339260312944524, 'recall_micro': 0.010857763300760043, 'recall_macro': 0.007639419404125286, 'accuracy': 0.006401137980085348, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 10: 100%|██████████| 1406/1406 [02:34<00:00,  9.11it/s]


Training loss: 0.27684737290461064
{'f1_sampled': 0.015647226173541962, 'f1_micro': 0.03619197482297404, 'f1_macro': 0.03498203923735838, 'f_05_sample': 0.017310500863132447, 'f_05_micro': 0.08214285714285714, 'f_05_macro': 0.051858714222076455, 'precision_sampled': 0.01938122332859175, 'precision_micro': 0.5348837209302325, 'precision_macro': 0.07690746753246754, 'recall_sampled': 0.014473684210526317, 'recall_micro': 0.018729641693811076, 'recall_macro': 0.022727272727272728, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 11:  28%|██▊       | 399/1406 [00:38<01:40, 10.06it/s]

-- Step: 400


Epoch 11:  29%|██▊       | 403/1406 [00:45<12:12,  1.37it/s]

{'f1_sampled': 0.012754860123281173, 'f1_micro': 0.03164556962025317, 'f1_macro': 0.033607632159970695, 'f_05_sample': 0.01399235445288077, 'f_05_micro': 0.0728862973760933, 'f_05_macro': 0.051100693851859606, 'precision_sampled': 0.015469416785206259, 'precision_micro': 0.5555555555555556, 'precision_macro': 0.08017316017316019, 'recall_sampled': 0.011895448079658607, 'recall_micro': 0.016286644951140065, 'recall_macro': 0.021538918597742127, 'accuracy': 0.00604551920341394, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 11:  57%|█████▋    | 799/1406 [01:23<00:58, 10.45it/s]

-- Step: 800


Epoch 11:  57%|█████▋    | 801/1406 [01:29<10:11,  1.01s/it]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.0024893314366998577, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 11:  85%|████████▌ | 1199/1406 [02:08<00:20, 10.26it/s]

-- Step: 1200


Epoch 11:  86%|████████▌ | 1203/1406 [02:14<02:27,  1.38it/s]

{'f1_sampled': 0.013786154575628259, 'f1_micro': 0.027946216715001318, 'f1_macro': 0.01567583555161195, 'f_05_sample': 0.01608187134502924, 'f_05_micro': 0.06432038834951456, 'f_05_macro': 0.023368606701940037, 'precision_sampled': 0.018847795163584636, 'precision_micro': 0.48623853211009177, 'precision_macro': 0.03473132372214941, 'recall_sampled': 0.011747273589378852, 'recall_micro': 0.014386536373507057, 'recall_macro': 0.010122230710466005, 'accuracy': 0.008179231863442389, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 11: 100%|██████████| 1406/1406 [02:34<00:00,  9.09it/s]


Training loss: 0.2771308685137571
{'f1_sampled': 0.010858226647700331, 'f1_micro': 0.022848034006376194, 'f1_macro': 0.013530522341095029, 'f_05_sample': 0.012879185576553999, 'f_05_micro': 0.053696303696303696, 'f_05_macro': 0.022128447920955124, 'precision_sampled': 0.015291607396870554, 'precision_micro': 0.5375, 'precision_macro': 0.03839285714285714, 'recall_sampled': 0.00905049786628734, 'recall_micro': 0.011672095548317047, 'recall_macro': 0.008212375859434682, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 12:  28%|██▊       | 400/1406 [00:38<01:37, 10.31it/s]

-- Step: 400


Epoch 12:  29%|██▊       | 402/1406 [00:45<16:34,  1.01it/s]

{'f1_sampled': 0.013193456614509245, 'f1_micro': 0.026934248745709006, 'f1_macro': 0.015274034141958671, 'f_05_sample': 0.015429903587798324, 'f_05_micro': 0.062255859375, 'f_05_macro': 0.02317339149400218, 'precision_sampled': 0.01813655761024182, 'precision_micro': 0.49514563106796117, 'precision_macro': 0.03536754507628294, 'recall_sampled': 0.01121384542437174, 'recall_micro': 0.013843648208469055, 'recall_macro': 0.00974025974025974, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 12:  57%|█████▋    | 799/1406 [01:23<00:58, 10.30it/s]

-- Step: 800


Epoch 12:  57%|█████▋    | 803/1406 [01:30<07:23,  1.36it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 12:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.38it/s]

-- Step: 1200


Epoch 12:  86%|████████▌ | 1203/1406 [02:14<02:55,  1.16it/s]

{'f1_sampled': 0.010680417259364626, 'f1_micro': 0.022352315061202766, 'f1_macro': 0.013392857142857142, 'f_05_sample': 0.01262517216464585, 'f_05_micro': 0.052763819095477386, 'f_05_macro': 0.02238805970149254, 'precision_sampled': 0.014935988620199146, 'precision_micro': 0.5675675675675675, 'precision_macro': 0.040540540540540536, 'recall_sampled': 0.008931958274063538, 'recall_micro': 0.011400651465798045, 'recall_macro': 0.008021390374331552, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 12: 100%|██████████| 1406/1406 [02:34<00:00,  9.09it/s]


Training loss: 0.2767593118641658
{'f1_sampled': 0.012660028449502132, 'f1_micro': 0.025946518400847232, 'f1_macro': 0.014989293361884369, 'f_05_sample': 0.014820271399218769, 'f_05_micro': 0.0604043392504931, 'f_05_macro': 0.023458445040214475, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5268817204301075, 'precision_macro': 0.03763440860215054, 'recall_sampled': 0.010739687055476529, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 13:  28%|██▊       | 400/1406 [00:38<01:36, 10.40it/s]

-- Step: 400


Epoch 13:  29%|██▊       | 403/1406 [00:45<13:59,  1.19it/s]

{'f1_sampled': 0.011296823138928401, 'f1_micro': 0.02335456475583864, 'f1_macro': 0.01372426699937617, 'f_05_sample': 0.01326232247284879, 'f_05_micro': 0.05472636815920398, 'f_05_macro': 0.022132796780684104, 'precision_sampled': 0.015647226173541962, 'precision_micro': 0.5238095238095238, 'precision_macro': 0.03741496598639456, 'recall_sampled': 0.009554291133238501, 'recall_micro': 0.011943539630836048, 'recall_macro': 0.008403361344537815, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 13:  57%|█████▋    | 799/1406 [01:23<00:59, 10.19it/s]

-- Step: 800


Epoch 13:  57%|█████▋    | 803/1406 [01:30<07:26,  1.35it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 13:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.37it/s]

-- Step: 1200


Epoch 13:  85%|████████▌ | 1202/1406 [02:14<03:23,  1.00it/s]

{'f1_sampled': 0.01345424371740161, 'f1_micro': 0.027505950806664903, 'f1_macro': 0.016852195423623994, 'f_05_sample': 0.01573683646052067, 'f_05_micro': 0.06385068762278978, 'f_05_macro': 0.0284513089804813, 'precision_sampled': 0.01849217638691323, 'precision_micro': 0.5360824742268041, 'precision_macro': 0.07783189033189032, 'recall_sampled': 0.011421289710763395, 'recall_micro': 0.014115092290988056, 'recall_macro': 0.010170775516044058, 'accuracy': 0.008179231863442389, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 13: 100%|██████████| 1406/1406 [02:34<00:00,  9.10it/s]


Training loss: 0.2769777487809763
{'f1_sampled': 0.01345424371740161, 'f1_micro': 0.02749867794817557, 'f1_macro': 0.016820806469063178, 'f_05_sample': 0.01573683646052067, 'f_05_micro': 0.06378802747791953, 'f_05_macro': 0.028324619563629992, 'precision_sampled': 0.01849217638691323, 'precision_micro': 0.5306122448979592, 'precision_macro': 0.07740324594257178, 'recall_sampled': 0.011421289710763395, 'recall_micro': 0.014115092290988056, 'recall_macro': 0.010170775516044058, 'accuracy': 0.008179231863442389, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 14:  28%|██▊       | 399/1406 [00:38<01:40, 10.06it/s]

-- Step: 400


Epoch 14:  29%|██▊       | 403/1406 [00:45<12:06,  1.38it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.02544394381129075, 'f1_macro': 0.014810243751928418, 'f_05_sample': 0.014566257987310621, 'f_05_micro': 0.0594059405940594, 'f_05_macro': 0.023483365949119372, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5393258426966292, 'precision_macro': 0.038523274478330656, 'recall_sampled': 0.010621147463252727, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.009167303284950344, 'accuracy': 0.007823613086770981, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 14:  57%|█████▋    | 800/1406 [01:23<00:58, 10.39it/s]

-- Step: 800


Epoch 14:  57%|█████▋    | 803/1406 [01:29<08:28,  1.19it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 14:  85%|████████▌ | 1199/1406 [02:08<00:20, 10.30it/s]

-- Step: 1200


Epoch 14:  86%|████████▌ | 1203/1406 [02:14<02:28,  1.37it/s]

{'f1_sampled': 0.01343053579895685, 'f1_micro': 0.02740447957839262, 'f1_macro': 0.015316642120765831, 'f_05_sample': 0.015726252568357833, 'f_05_micro': 0.06298449612403101, 'f_05_macro': 0.022703457911281873, 'precision_sampled': 0.01849217638691323, 'precision_micro': 0.46846846846846846, 'precision_macro': 0.03346203346203346, 'recall_sampled': 0.011391654812707444, 'recall_micro': 0.014115092290988056, 'recall_macro': 0.009931245225362872, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 14: 100%|██████████| 1406/1406 [02:34<00:00,  9.12it/s]


Training loss: 0.2765717147339459
{'f1_sampled': 0.01343053579895685, 'f1_micro': 0.027411702688455455, 'f1_macro': 0.015348288075560804, 'f_05_sample': 0.015726252568357833, 'f_05_micro': 0.06304558680892337, 'f_05_macro': 0.022815022815022814, 'precision_sampled': 0.01849217638691323, 'precision_micro': 0.4727272727272727, 'precision_macro': 0.033766233766233764, 'recall_sampled': 0.011391654812707444, 'recall_micro': 0.014115092290988056, 'recall_macro': 0.009931245225362872, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 15:  28%|██▊       | 399/1406 [00:38<01:38, 10.24it/s]

-- Step: 400


Epoch 15:  29%|██▊       | 403/1406 [00:45<12:04,  1.38it/s]

{'f1_sampled': 0.01343053579895685, 'f1_micro': 0.0273972602739726, 'f1_macro': 0.015285126396237508, 'f_05_sample': 0.015726252568357833, 'f_05_micro': 0.06292352371732816, 'f_05_macro': 0.02259297879735836, 'precision_sampled': 0.01849217638691323, 'precision_micro': 0.4642857142857143, 'precision_macro': 0.03316326530612245, 'recall_sampled': 0.011391654812707444, 'recall_micro': 0.014115092290988056, 'recall_macro': 0.009931245225362872, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 15:  57%|█████▋    | 800/1406 [01:23<00:57, 10.59it/s]

-- Step: 800


Epoch 15:  57%|█████▋    | 802/1406 [01:29<09:40,  1.04it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 15:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.43it/s]

-- Step: 1200


Epoch 15:  86%|████████▌ | 1203/1406 [02:13<02:47,  1.21it/s]

{'f1_sampled': 0.012126600284495019, 'f1_micro': 0.025396825396825397, 'f1_macro': 0.01458966565349544, 'f_05_sample': 0.014363047257784098, 'f_05_micro': 0.058997050147492625, 'f_05_macro': 0.022615906520919715, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010146989094357515, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.009167303284950344, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 15: 100%|██████████| 1406/1406 [02:33<00:00,  9.18it/s]


Training loss: 0.27702278603573105
{'f1_sampled': 0.012126600284495019, 'f1_micro': 0.025396825396825397, 'f1_macro': 0.01458966565349544, 'f_05_sample': 0.014363047257784098, 'f_05_micro': 0.058997050147492625, 'f_05_macro': 0.022615906520919715, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010146989094357515, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.009167303284950344, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 16:  28%|██▊       | 400/1406 [00:38<01:35, 10.58it/s]

-- Step: 400


Epoch 16:  29%|██▊       | 403/1406 [00:44<13:47,  1.21it/s]

{'f1_sampled': 0.012126600284495019, 'f1_micro': 0.025396825396825397, 'f1_macro': 0.01458966565349544, 'f_05_sample': 0.014363047257784098, 'f_05_micro': 0.058997050147492625, 'f_05_macro': 0.022615906520919715, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010146989094357515, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.009167303284950344, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 16:  57%|█████▋    | 800/1406 [01:23<00:57, 10.49it/s]

-- Step: 800


Epoch 16:  57%|█████▋    | 802/1406 [01:29<09:53,  1.02it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 16:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.37it/s]

-- Step: 1200


Epoch 16:  86%|████████▌ | 1203/1406 [02:13<02:47,  1.21it/s]

{'f1_sampled': 0.012126600284495019, 'f1_micro': 0.025396825396825397, 'f1_macro': 0.01458966565349544, 'f_05_sample': 0.014363047257784098, 'f_05_micro': 0.058997050147492625, 'f_05_macro': 0.022615906520919715, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010146989094357515, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.009167303284950344, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 16: 100%|██████████| 1406/1406 [02:33<00:00,  9.18it/s]


Training loss: 0.27672309152898544
{'f1_sampled': 0.012126600284495019, 'f1_micro': 0.025396825396825397, 'f1_macro': 0.01458966565349544, 'f_05_sample': 0.014363047257784098, 'f_05_micro': 0.058997050147492625, 'f_05_macro': 0.022615906520919715, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010146989094357515, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.009167303284950344, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 17:  28%|██▊       | 400/1406 [00:38<01:37, 10.34it/s]

-- Step: 400


Epoch 17:  29%|██▊       | 402/1406 [00:44<16:14,  1.03it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025912215758857746, 'f1_macro': 0.014830508474576272, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.060107948969578016, 'f_05_macro': 0.022845953002610966, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 17:  57%|█████▋    | 800/1406 [01:22<00:57, 10.49it/s]

-- Step: 800


Epoch 17:  57%|█████▋    | 802/1406 [01:29<09:58,  1.01it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 17:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.57it/s]

-- Step: 1200


Epoch 17:  85%|████████▌ | 1202/1406 [02:13<03:22,  1.01it/s]

{'f1_sampled': 0.012837837837837835, 'f1_micro': 0.02641310089804543, 'f1_macro': 0.015006002400960384, 'f_05_sample': 0.015074284811126916, 'f_05_micro': 0.06109481915933529, 'f_05_macro': 0.022835221044939714, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.49019607843137253, 'precision_macro': 0.0350140056022409, 'recall_sampled': 0.010858226647700331, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 17: 100%|██████████| 1406/1406 [02:33<00:00,  9.18it/s]


Training loss: 0.2767564372860351
{'f1_sampled': 0.012837837837837835, 'f1_micro': 0.02641310089804543, 'f1_macro': 0.015006002400960384, 'f_05_sample': 0.015074284811126916, 'f_05_micro': 0.06109481915933529, 'f_05_macro': 0.022835221044939714, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.49019607843137253, 'precision_macro': 0.0350140056022409, 'recall_sampled': 0.010858226647700331, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 18:  28%|██▊       | 400/1406 [00:38<01:38, 10.26it/s]

-- Step: 400


Epoch 18:  29%|██▊       | 402/1406 [00:44<16:25,  1.02it/s]

{'f1_sampled': 0.012837837837837835, 'f1_micro': 0.02641310089804543, 'f1_macro': 0.015006002400960384, 'f_05_sample': 0.015074284811126916, 'f_05_micro': 0.06109481915933529, 'f_05_macro': 0.022835221044939714, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.49019607843137253, 'precision_macro': 0.0350140056022409, 'recall_sampled': 0.010858226647700331, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 18:  57%|█████▋    | 799/1406 [01:23<00:58, 10.31it/s]

-- Step: 800


Epoch 18:  57%|█████▋    | 803/1406 [01:29<07:20,  1.37it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 18:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.47it/s]

-- Step: 1200


Epoch 18:  85%|████████▌ | 1202/1406 [02:13<03:19,  1.02it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025898520084566595, 'f1_macro': 0.014767932489451477, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.05999020568070519, 'f_05_macro': 0.02260981912144703, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.49, 'precision_macro': 0.034999999999999996, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 18: 100%|██████████| 1406/1406 [02:33<00:00,  9.16it/s]


Training loss: 0.2765737763933233
{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025898520084566595, 'f1_macro': 0.014767932489451477, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.05999020568070519, 'f_05_macro': 0.02260981912144703, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.49, 'precision_macro': 0.034999999999999996, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 19:  28%|██▊       | 399/1406 [00:38<01:38, 10.22it/s]

-- Step: 400


Epoch 19:  29%|██▊       | 403/1406 [00:44<12:02,  1.39it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025898520084566595, 'f1_macro': 0.014767932489451477, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.05999020568070519, 'f_05_macro': 0.02260981912144703, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.49, 'precision_macro': 0.034999999999999996, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 19:  57%|█████▋    | 799/1406 [01:23<00:58, 10.39it/s]

-- Step: 800


Epoch 19:  57%|█████▋    | 803/1406 [01:29<07:11,  1.40it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.02590536611155168, 'f1_macro': 0.014799154334038054, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06004901960784314, 'f_05_macro': 0.022727272727272728, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.494949494949495, 'precision_macro': 0.03535353535353535, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 19:  85%|████████▌ | 1199/1406 [02:07<00:19, 10.39it/s]

-- Step: 1200


Epoch 19:  86%|████████▌ | 1203/1406 [02:13<02:24,  1.40it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.02590536611155168, 'f1_macro': 0.014799154334038054, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06004901960784314, 'f_05_macro': 0.022727272727272728, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.494949494949495, 'precision_macro': 0.03535353535353535, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 19: 100%|██████████| 1406/1406 [02:33<00:00,  9.17it/s]


Training loss: 0.27660568433669347
{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.02590536611155168, 'f1_macro': 0.014799154334038054, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06004901960784314, 'f_05_macro': 0.022727272727272728, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.494949494949495, 'precision_macro': 0.03535353535353535, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 20:  28%|██▊       | 400/1406 [00:38<01:36, 10.45it/s]

-- Step: 400


Epoch 20:  29%|██▊       | 403/1406 [00:44<13:38,  1.23it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.02590536611155168, 'f1_macro': 0.014799154334038054, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06004901960784314, 'f_05_macro': 0.022727272727272728, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.494949494949495, 'precision_macro': 0.03535353535353535, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 20:  57%|█████▋    | 800/1406 [01:23<00:57, 10.49it/s]

-- Step: 800


Epoch 20:  57%|█████▋    | 802/1406 [01:29<09:45,  1.03it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 20:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.34it/s]

-- Step: 1200


Epoch 20:  85%|████████▌ | 1202/1406 [02:13<03:24,  1.00s/it]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025912215758857746, 'f1_macro': 0.014830508474576272, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.060107948969578016, 'f_05_macro': 0.022845953002610966, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 20: 100%|██████████| 1406/1406 [02:33<00:00,  9.17it/s]


Training loss: 0.27656398377623703
{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025925925925925925, 'f1_macro': 0.014893617021276595, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06022615535889872, 'f_05_macro': 0.02308707124010554, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5104166666666666, 'precision_macro': 0.03645833333333333, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 21:  28%|██▊       | 400/1406 [00:38<01:36, 10.40it/s]

-- Step: 400


Epoch 21:  29%|██▊       | 402/1406 [00:44<16:44,  1.00s/it]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025925925925925925, 'f1_macro': 0.014893617021276595, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06022615535889872, 'f_05_macro': 0.02308707124010554, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5104166666666666, 'precision_macro': 0.03645833333333333, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 21:  57%|█████▋    | 800/1406 [01:23<00:58, 10.40it/s]

-- Step: 800


Epoch 21:  57%|█████▋    | 802/1406 [01:29<09:51,  1.02it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025925925925925925, 'f1_macro': 0.014893617021276595, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06022615535889872, 'f_05_macro': 0.02308707124010554, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5104166666666666, 'precision_macro': 0.03645833333333333, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 21:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.52it/s]

-- Step: 1200


Epoch 21:  85%|████████▌ | 1202/1406 [02:13<03:19,  1.02it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025925925925925925, 'f1_macro': 0.014893617021276595, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06022615535889872, 'f_05_macro': 0.02308707124010554, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5104166666666666, 'precision_macro': 0.03645833333333333, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 21: 100%|██████████| 1406/1406 [02:33<00:00,  9.18it/s]


Training loss: 0.27652807629125387
{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025925925925925925, 'f1_macro': 0.014893617021276595, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.06022615535889872, 'f_05_macro': 0.02308707124010554, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5104166666666666, 'precision_macro': 0.03645833333333333, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 22:  28%|██▊       | 400/1406 [00:38<01:37, 10.37it/s]

-- Step: 400


Epoch 22:  29%|██▊       | 402/1406 [00:44<16:32,  1.01it/s]

{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.026427061310782242, 'f1_macro': 0.015069318866787222, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.06121449559255632, 'f_05_macro': 0.02307124400147656, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 22:  57%|█████▋    | 800/1406 [01:23<00:58, 10.33it/s]

-- Step: 800


Epoch 22:  57%|█████▋    | 803/1406 [01:29<08:21,  1.20it/s]

{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.026427061310782242, 'f1_macro': 0.015069318866787222, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.06121449559255632, 'f_05_macro': 0.02307124400147656, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 22:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.51it/s]

-- Step: 1200


Epoch 22:  85%|████████▌ | 1202/1406 [02:13<03:19,  1.02it/s]

{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.026427061310782242, 'f1_macro': 0.015069318866787222, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.06121449559255632, 'f_05_macro': 0.02307124400147656, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 22: 100%|██████████| 1406/1406 [02:33<00:00,  9.17it/s]


Training loss: 0.27647205627659815
{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.026427061310782242, 'f1_macro': 0.015069318866787222, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.06121449559255632, 'f_05_macro': 0.02307124400147656, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 23:  28%|██▊       | 400/1406 [00:38<01:35, 10.51it/s]

-- Step: 400


Epoch 23:  29%|██▊       | 402/1406 [00:44<16:26,  1.02it/s]

{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.026427061310782242, 'f1_macro': 0.015069318866787222, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.06121449559255632, 'f_05_macro': 0.02307124400147656, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 23:  57%|█████▋    | 800/1406 [01:23<00:57, 10.49it/s]

-- Step: 800


Epoch 23:  57%|█████▋    | 803/1406 [01:29<08:35,  1.17it/s]

{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.026427061310782242, 'f1_macro': 0.015069318866787222, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.06121449559255632, 'f_05_macro': 0.02307124400147656, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 23:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.53it/s]

-- Step: 1200


Epoch 23:  85%|████████▌ | 1202/1406 [02:13<03:23,  1.00it/s]

{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.026392187912377935, 'f1_macro': 0.014912019087384432, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.060916179337231965, 'f_05_macro': 0.0224901043540842, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.47619047619047616, 'precision_macro': 0.03401360544217687, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 23: 100%|██████████| 1406/1406 [02:33<00:00,  9.15it/s]


Training loss: 0.2763599011395259
{'f1_sampled': 0.012719298245614035, 'f1_micro': 0.02637826431020839, 'f1_macro': 0.01485001485001485, 'f_05_sample': 0.015015015015015017, 'f_05_micro': 0.060797665369649805, 'f_05_macro': 0.02226576416102601, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.4672897196261682, 'precision_macro': 0.03337783711615487, 'recall_sampled': 0.010680417259364626, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.006401137980085348, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 24:  28%|██▊       | 400/1406 [00:38<01:36, 10.45it/s]

-- Step: 400


Epoch 24:  29%|██▊       | 402/1406 [00:44<16:20,  1.02it/s]

{'f1_sampled': 0.009554291133238501, 'f1_micro': 0.019722814498933903, 'f1_macro': 0.011958629605688428, 'f_05_sample': 0.011185762830499674, 'f_05_micro': 0.0467644084934277, 'f_05_macro': 0.020455550641309155, 'precision_sampled': 0.013157894736842105, 'precision_micro': 0.5441176470588235, 'precision_macro': 0.03886554621848739, 'recall_sampled': 0.008102181128496918, 'recall_micro': 0.01004343105320304, 'recall_macro': 0.00706646294881589, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 24:  57%|█████▋    | 799/1406 [01:22<00:58, 10.46it/s]

-- Step: 800


Epoch 24:  57%|█████▋    | 803/1406 [01:29<07:33,  1.33it/s]

{'f1_sampled': 0.009791370317686107, 'f1_micro': 0.020250466293631763, 'f1_macro': 0.012254111576910674, 'f_05_sample': 0.011482111811059181, 'f_05_micro': 0.047979797979797977, 'f_05_macro': 0.02087912087912088, 'precision_sampled': 0.013513513513513514, 'precision_micro': 0.5507246376811594, 'precision_macro': 0.039337474120082816, 'recall_sampled': 0.008279990516832621, 'recall_micro': 0.01031487513572204, 'recall_macro': 0.007257448433919023, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 24:  85%|████████▌ | 1199/1406 [02:07<00:19, 10.47it/s]

-- Step: 1200


Epoch 24:  86%|████████▌ | 1203/1406 [02:13<02:24,  1.41it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 24: 100%|██████████| 1406/1406 [02:33<00:00,  9.16it/s]


Training loss: 0.27779164267001755
{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 25:  28%|██▊       | 400/1406 [00:38<01:36, 10.45it/s]

-- Step: 400


Epoch 25:  29%|██▊       | 402/1406 [00:44<16:27,  1.02it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 25:  57%|█████▋    | 799/1406 [01:22<00:59, 10.28it/s]

-- Step: 800


Epoch 25:  57%|█████▋    | 803/1406 [01:29<07:11,  1.40it/s]

{'f1_sampled': 0.0018966334755808438, 'f1_micro': 0.004327833378414931, 'f1_macro': 0.002953119232189, 'f_05_sample': 0.0023058773058773057, 'f_05_micro': 0.010706638115631691, 'f_05_macro': 0.006706908115358819, 'precision_sampled': 0.002844950213371266, 'precision_micro': 0.6153846153846154, 'precision_macro': 0.04395604395604396, 'recall_sampled': 0.001552868658131816, 'recall_micro': 0.002171552660152009, 'recall_macro': 0.0015278838808250573, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 25:  85%|████████▌ | 1200/1406 [02:07<00:19, 10.44it/s]

-- Step: 1200


Epoch 25:  85%|████████▌ | 1202/1406 [02:13<03:19,  1.02it/s]

{'f1_sampled': 0.002892366050260787, 'f1_micro': 0.007015650296815974, 'f1_macro': 0.00468975468975469, 'f_05_sample': 0.003639447718395087, 'f_05_micro': 0.0172322375397667, 'f_05_macro': 0.01004947433518862, 'precision_sampled': 0.004623044096728307, 'precision_micro': 0.5909090909090909, 'precision_macro': 0.04220779220779221, 'recall_sampled': 0.002264106211474632, 'recall_micro': 0.003528773072747014, 'recall_macro': 0.002482811306340718, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 25: 100%|██████████| 1406/1406 [02:33<00:00,  9.17it/s]


Training loss: 0.277712665710222
{'f1_sampled': 0.012660028449502132, 'f1_micro': 0.025755584756898816, 'f1_macro': 0.01414141414141414, 'f_05_sample': 0.014820271399218769, 'f_05_micro': 0.05878119001919386, 'f_05_macro': 0.020396270396270396, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.4049586776859504, 'precision_macro': 0.02892561983471074, 'recall_sampled': 0.010739687055476529, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 26:  28%|██▊       | 399/1406 [00:38<01:35, 10.54it/s]

-- Step: 400


Epoch 26:  29%|██▊       | 403/1406 [00:44<10:58,  1.52it/s]

{'f1_sampled': 0.004018492176386913, 'f1_micro': 0.00967741935483871, 'f1_macro': 0.0062717770034843206, 'f_05_sample': 0.005078857052541263, 'f_05_micro': 0.023510971786833857, 'f_05_macro': 0.012410369553226696, 'precision_sampled': 0.006401137980085348, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.0030938833570412516, 'recall_micro': 0.004885993485342019, 'recall_macro': 0.003437738731856379, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 26:  57%|█████▋    | 799/1406 [01:21<00:57, 10.56it/s]

-- Step: 800


Epoch 26:  57%|█████▋    | 803/1406 [01:27<06:35,  1.52it/s]

{'f1_sampled': 0.008546704599336178, 'f1_micro': 0.018133333333333335, 'f1_macro': 0.01103896103896104, 'f_05_sample': 0.010135840727945992, 'f_05_micro': 0.04305977710233029, 'f_05_macro': 0.01903269144648455, 'precision_sampled': 0.01209103840682788, 'precision_micro': 0.5151515151515151, 'precision_macro': 0.03679653679653679, 'recall_sampled': 0.007153864390706495, 'recall_micro': 0.009229098805646038, 'recall_macro': 0.006493506493506494, 'accuracy': 0.00604551920341394, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 26:  85%|████████▌ | 1199/1406 [02:05<00:20, 10.16it/s]

-- Step: 1200


Epoch 26:  86%|████████▌ | 1203/1406 [02:11<02:27,  1.38it/s]

{'f1_sampled': 0.00647226173541963, 'f1_micro': 0.014465577283686044, 'f1_macro': 0.00911854103343465, 'f_05_sample': 0.007866654248233195, 'f_05_micro': 0.03479381443298969, 'f_05_macro': 0.016917293233082706, 'precision_sampled': 0.009601706970128023, 'precision_micro': 0.5510204081632653, 'precision_macro': 0.039358600583090375, 'recall_sampled': 0.005257230915125651, 'recall_micro': 0.007328990228013029, 'recall_macro': 0.005156608097784568, 'accuracy': 0.004978662873399715, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 26: 100%|██████████| 1406/1406 [02:31<00:00,  9.28it/s]


Training loss: 0.2769992264202275
{'f1_sampled': 0.005049786628733997, 'f1_micro': 0.011815252416756176, 'f1_macro': 0.00759144237405107, 'f_05_sample': 0.006305882950619793, 'f_05_micro': 0.02861602497398543, 'f_05_macro': 0.01471375066880685, 'precision_sampled': 0.007823613086770981, 'precision_micro': 0.55, 'precision_macro': 0.03928571428571429, 'recall_sampled': 0.003941441441441441, 'recall_micro': 0.005971769815418024, 'recall_macro': 0.004201680672268907, 'accuracy': 0.0039118065433854906, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 27:  28%|██▊       | 400/1406 [00:38<01:38, 10.18it/s]

-- Step: 400


Epoch 27:  29%|██▊       | 402/1406 [00:45<16:46,  1.00s/it]

{'f1_sampled': 0.005523944997629208, 'f1_micro': 0.012878991145693587, 'f1_macro': 0.008221993833504625, 'f_05_sample': 0.006859067714330873, 'f_05_micro': 0.03112033195020747, 'f_05_macro': 0.015698587127158554, 'precision_sampled': 0.008534850640113799, 'precision_micro': 0.5581395348837209, 'precision_macro': 0.03986710963455149, 'recall_sampled': 0.0043681839734471305, 'recall_micro': 0.006514657980456026, 'recall_macro': 0.004583651642475172, 'accuracy': 0.004267425320056899, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 27:  57%|█████▋    | 799/1406 [01:23<00:59, 10.25it/s]

-- Step: 800


Epoch 27:  57%|█████▋    | 803/1406 [01:30<07:19,  1.37it/s]

{'f1_sampled': 0.0014461830251303934, 'f1_micro': 0.003246753246753247, 'f1_macro': 0.0022205773501110288, 'f_05_sample': 0.001737875093138251, 'f_05_micro': 0.008038585209003215, 'f_05_macro': 0.0050778605280974946, 'precision_sampled': 0.0021337126600284497, 'precision_micro': 0.5, 'precision_macro': 0.03571428571428571, 'recall_sampled': 0.0012150308202939781, 'recall_micro': 0.0016286644951140066, 'recall_macro': 0.001145912910618793, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 27:  85%|████████▌ | 1199/1406 [02:08<00:20, 10.34it/s]

-- Step: 1200


Epoch 27:  86%|████████▌ | 1203/1406 [02:14<02:27,  1.38it/s]

{'f1_sampled': 0.0034850640113798003, 'f1_micro': 0.008632317237658484, 'f1_macro': 0.0057574667146455556, 'f_05_sample': 0.004443823522770891, 'f_05_micro': 0.0211864406779661, 'f_05_macro': 0.012262415695892091, 'precision_sampled': 0.005689900426742532, 'precision_micro': 0.6956521739130435, 'precision_macro': 0.049689440993788817, 'recall_sampled': 0.0026789947842579417, 'recall_micro': 0.004343105320304018, 'recall_macro': 0.0030557677616501145, 'accuracy': 0.0035561877667140826, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 27: 100%|██████████| 1406/1406 [02:34<00:00,  9.10it/s]


Training loss: 0.2772352300043805
{'f1_sampled': 0.005002370791844476, 'f1_micro': 0.01129032258064516, 'f1_macro': 0.007317073170731707, 'f_05_sample': 0.006118195262932105, 'f_05_micro': 0.0274294670846395, 'f_05_macro': 0.01447876447876448, 'precision_sampled': 0.007467994310099573, 'precision_micro': 0.5833333333333334, 'precision_macro': 0.04166666666666667, 'recall_sampled': 0.0040125651967757225, 'recall_micro': 0.005700325732899023, 'recall_macro': 0.004010695187165776, 'accuracy': 0.004267425320056899, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 28:  28%|██▊       | 399/1406 [00:38<01:38, 10.24it/s]

-- Step: 400


Epoch 28:  29%|██▊       | 401/1406 [00:45<16:53,  1.01s/it]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 28:  57%|█████▋    | 799/1406 [01:23<00:57, 10.50it/s]

-- Step: 800


Epoch 28:  57%|█████▋    | 803/1406 [01:29<07:19,  1.37it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 28:  85%|████████▌ | 1199/1406 [02:08<00:20, 10.33it/s]

-- Step: 1200


Epoch 28:  86%|████████▌ | 1203/1406 [02:14<02:27,  1.38it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 28: 100%|██████████| 1406/1406 [02:34<00:00,  9.12it/s]


Training loss: 0.2776644923863336
{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 29:  28%|██▊       | 400/1406 [00:39<01:42,  9.83it/s]

-- Step: 400


Epoch 29:  29%|██▊       | 403/1406 [00:44<12:21,  1.35it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 29:  57%|█████▋    | 799/1406 [01:23<00:58, 10.37it/s]

-- Step: 800


Epoch 29:  57%|█████▋    | 803/1406 [01:29<07:15,  1.38it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 29:  85%|████████▌ | 1199/1406 [02:07<00:19, 10.41it/s]

-- Step: 1200


Epoch 29:  86%|████████▌ | 1203/1406 [02:14<02:28,  1.37it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 29: 100%|██████████| 1406/1406 [02:33<00:00,  9.13it/s]


Training loss: 0.27671795413639944
{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 30:  28%|██▊       | 399/1406 [00:38<01:34, 10.63it/s]

-- Step: 400


Epoch 30:  29%|██▊       | 403/1406 [00:45<12:24,  1.35it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 30:  57%|█████▋    | 800/1406 [01:23<00:58, 10.35it/s]

-- Step: 800


Epoch 30:  57%|█████▋    | 803/1406 [01:30<08:25,  1.19it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 30:  85%|████████▌ | 1199/1406 [02:08<00:19, 10.49it/s]

-- Step: 1200


Epoch 30:  86%|████████▌ | 1203/1406 [02:14<02:29,  1.36it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 30: 100%|██████████| 1406/1406 [02:34<00:00,  9.10it/s]


Training loss: 0.2765017298512574
{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 31:  28%|██▊       | 400/1406 [00:38<01:37, 10.33it/s]

-- Step: 400


Epoch 31:  29%|██▊       | 402/1406 [00:45<16:48,  1.00s/it]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 31:  57%|█████▋    | 799/1406 [01:23<01:00, 10.03it/s]

-- Step: 800


Epoch 31:  57%|█████▋    | 803/1406 [01:29<07:23,  1.36it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 31:  85%|████████▌ | 1200/1406 [02:08<00:19, 10.38it/s]

-- Step: 1200


Epoch 31:  85%|████████▌ | 1202/1406 [02:14<03:25,  1.01s/it]

{'f1_sampled': 0.0005926979611190137, 'f1_micro': 0.0010843046896177825, 'f1_macro': 0.0007538635506973238, 'f_05_sample': 0.0006519677572309152, 'f_05_micro': 0.0026997840172786176, 'f_05_macro': 0.0018129079042784625, 'precision_sampled': 0.0007112375533428165, 'precision_micro': 0.4, 'precision_macro': 0.028571428571428574, 'recall_sampled': 0.0005334281650071124, 'recall_micro': 0.0005428881650380022, 'recall_macro': 0.0003819709702062643, 'accuracy': 0.003200568990042674, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 31: 100%|██████████| 1406/1406 [02:34<00:00,  9.11it/s]


Training loss: 0.2761887641117549
{'f1_sampled': 0.006009957325746799, 'f1_micro': 0.013412017167381975, 'f1_macro': 0.008544087491455913, 'f_05_sample': 0.007320525412630677, 'f_05_micro': 0.03238341968911917, 'f_05_macro': 0.016233766233766232, 'precision_sampled': 0.008890469416785207, 'precision_micro': 0.5681818181818182, 'precision_macro': 0.04058441558441559, 'recall_sampled': 0.004842342342342342, 'recall_micro': 0.0067861020629750276, 'recall_macro': 0.004774637127578305, 'accuracy': 0.004623044096728307, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 32:  28%|██▊       | 399/1406 [00:38<01:38, 10.23it/s]

-- Step: 400


Epoch 32:  29%|██▊       | 403/1406 [00:45<12:12,  1.37it/s]

{'f1_sampled': 0.008487434803224276, 'f1_micro': 0.018691588785046728, 'f1_macro': 0.011494252873563218, 'f_05_sample': 0.010271314547630339, 'f_05_micro': 0.04455193482688391, 'f_05_macro': 0.02022653721682848, 'precision_sampled': 0.012446657183499289, 'precision_micro': 0.5737704918032787, 'precision_macro': 0.040983606557377046, 'recall_sampled': 0.006916785206258889, 'recall_micro': 0.009500542888165039, 'recall_macro': 0.0066844919786096255, 'accuracy': 0.004978662873399715, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 32:  57%|█████▋    | 799/1406 [01:23<00:57, 10.56it/s]

-- Step: 800


Epoch 32:  57%|█████▋    | 801/1406 [01:29<10:08,  1.01s/it]

{'f1_sampled': 0.007041251778093882, 'f1_micro': 0.01606425702811245, 'f1_macro': 0.010084033613445379, 'f_05_sample': 0.008685847501636977, 'f_05_micro': 0.038580246913580245, 'f_05_macro': 0.018536826495304002, 'precision_sampled': 0.010668563300142247, 'precision_micro': 0.5882352941176471, 'precision_macro': 0.04201680672268908, 'recall_sampled': 0.00558321479374111, 'recall_micro': 0.008143322475570033, 'recall_macro': 0.005729564553093965, 'accuracy': 0.004623044096728307, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 32:  85%|████████▌ | 1200/1406 [02:08<00:20, 10.25it/s]

-- Step: 1200


Epoch 32:  85%|████████▌ | 1202/1406 [02:14<03:23,  1.00it/s]

{'f1_sampled': 0.012245139876718823, 'f1_micro': 0.025416997617156472, 'f1_macro': 0.014683389415723462, 'f_05_sample': 0.014422317053896001, 'f_05_micro': 0.05917159763313609, 'f_05_macro': 0.02297970126388357, 'precision_sampled': 0.017069701280227598, 'precision_micro': 0.5161290322580645, 'precision_macro': 0.03686635944700461, 'recall_sampled': 0.010324798482693218, 'recall_micro': 0.013029315960912053, 'recall_macro': 0.009167303284950344, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 32: 100%|██████████| 1406/1406 [02:34<00:00,  9.11it/s]


Training loss: 0.27607130603719066
{'f1_sampled': 0.012837837837837835, 'f1_micro': 0.026385224274406333, 'f1_macro': 0.014880952380952382, 'f_05_sample': 0.015074284811126916, 'f_05_micro': 0.06085686465433301, 'f_05_macro': 0.02237737200143215, 'precision_sampled': 0.017780938833570414, 'precision_micro': 0.4716981132075472, 'precision_macro': 0.03369272237196765, 'recall_sampled': 0.010858226647700331, 'recall_micro': 0.013572204125950055, 'recall_macro': 0.00954927425515661, 'accuracy': 0.007112375533428165, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 33:  28%|██▊       | 400/1406 [00:38<01:37, 10.36it/s]

-- Step: 400


Epoch 33:  29%|██▊       | 402/1406 [00:45<16:40,  1.00it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 33:  57%|█████▋    | 800/1406 [01:23<00:58, 10.41it/s]

-- Step: 800


Epoch 33:  57%|█████▋    | 802/1406 [01:29<10:08,  1.01s/it]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 33:  85%|████████▌ | 1200/1406 [02:08<00:20, 10.29it/s]

-- Step: 1200


Epoch 33:  85%|████████▌ | 1202/1406 [02:14<03:22,  1.01it/s]

{'f1_sampled': 0.010004741583688952, 'f1_micro': 0.02082777036048064, 'f1_macro': 0.012807881773399015, 'f_05_sample': 0.01176787689945585, 'f_05_micro': 0.049643584521384926, 'f_05_macro': 0.022538141470180306, 'precision_sampled': 0.013869132290184922, 'precision_micro': 0.639344262295082, 'precision_macro': 0.04566744730679157, 'recall_sampled': 0.008428165007112375, 'recall_micro': 0.010586319218241042, 'recall_macro': 0.007448433919022154, 'accuracy': 0.006756756756756757, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 33: 100%|██████████| 1406/1406 [02:34<00:00,  9.11it/s]


Training loss: 0.27645706714980123
{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 34:  28%|██▊       | 399/1406 [00:38<01:37, 10.30it/s]

-- Step: 400


Epoch 34:  29%|██▊       | 403/1406 [00:45<12:08,  1.38it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 34:  57%|█████▋    | 799/1406 [01:23<00:58, 10.40it/s]

-- Step: 800


Epoch 34:  57%|█████▋    | 803/1406 [01:29<07:16,  1.38it/s]

{'f1_sampled': 0.012482219061166427, 'f1_micro': 0.025946518400847232, 'f1_macro': 0.014989293361884369, 'f_05_sample': 0.014718666034455506, 'f_05_micro': 0.0604043392504931, 'f_05_macro': 0.023458445040214475, 'precision_sampled': 0.017425320056899004, 'precision_micro': 0.5268817204301075, 'precision_macro': 0.03763440860215054, 'recall_sampled': 0.010502607871028923, 'recall_micro': 0.013300760043431054, 'recall_macro': 0.009358288770053475, 'accuracy': 0.007467994310099573, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 34:  85%|████████▌ | 1199/1406 [02:08<00:19, 10.47it/s]

-- Step: 1200


Epoch 34:  86%|████████▌ | 1203/1406 [02:14<02:27,  1.38it/s]

{'f1_sampled': 0.0, 'f1_micro': 0.0, 'f1_macro': 0.0, 'f_05_sample': 0.0, 'f_05_micro': 0.0, 'f_05_macro': 0.0, 'precision_sampled': 0.0, 'precision_micro': 0.0, 'precision_macro': 0.0, 'recall_sampled': 0.0, 'recall_micro': 0.0, 'recall_macro': 0.0, 'accuracy': 0.002844950213371266, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 34: 100%|██████████| 1406/1406 [02:33<00:00,  9.14it/s]


Training loss: 0.2760580286468415
{'f1_sampled': 0.006389284020862968, 'f1_micro': 0.013978494623655914, 'f1_macro': 0.009059233449477353, 'f_05_sample': 0.0076867280814649245, 'f_05_micro': 0.03396029258098224, 'f_05_macro': 0.017926089354660783, 'precision_sampled': 0.009246088193456615, 'precision_micro': 0.7222222222222222, 'precision_macro': 0.051587301587301584, 'recall_sampled': 0.005227596017069701, 'recall_micro': 0.007057546145494028, 'recall_macro': 0.004965622612681436, 'accuracy': 0.004978662873399715, 'coverage_error': np.float64(14.0), 'label_ranking_loss': np.float64(1.0), 'lrap': np.float64(0.07142857142857141)}


Epoch 35:   4%|▍         | 55/1406 [00:05<02:12, 10.17it/s]


KeyboardInterrupt: 

In [47]:
def test(model, val_dataloader, criterion, is_final_test=False):
    global device

    val_loss = 0
    true, pred = [], []

    model.eval()

    results = {}

    for step, batch in enumerate(val_dataloader):
        b_input_ids = batch['input_ids'].to(device)
        b_attention_mask = batch['attention_mask'].to(device)
        b_labels = batch['labels'].to(device)

        with torch.no_grad():
            logits = model(input_ids=b_input_ids, attention_mask=b_attention_mask)

            loss = criterion(logits, b_labels)
            val_loss += loss.item()

            logits = torch.sigmoid(logits)
            logits = np.round(logits.cpu().numpy())
            labels = b_labels.cpu().numpy()

            pred.extend(logits)
            true.extend(labels)

    # avg_val_loss = val_loss / len(val_dataloader)
    # print('Val loss:', avg_val_loss)
    # print('Val accuracy:', accuracy_score(true, pred))

    # print('Val precision:', precision_score(true, pred, average='weighted'))
    # print('Val recall:', recall_score(true, pred, average='weighted'))

    # val_micro_f1_score = f1_score(true, pred, average='micro')
    # print('Val micro f1 score:', val_micro_f1_score)

    # val_macro_f1_score = f1_score(true, pred, average='macro')
    # print('Val macro f1 score:', val_macro_f1_score)

    val_weighted_f1_score = f1_score(true, pred, average='weighted')
    # print('Val weighted f1 score:', val_weighted_f1_score)

    results.update({
        "f1_sampled": f1_score(true, pred, average='samples', zero_division=0),
        "f1_micro": f1_score(true, pred, average='micro', zero_division=0),
        "f1_macro": f1_score(true, pred, average='macro', zero_division=0),
        "f_05_sample": fbeta_score(true, pred, average='samples', beta=0.5, zero_division=0),
        "f_05_micro": fbeta_score(true, pred, average='micro', beta=0.5, zero_division=0),
        "f_05_macro": fbeta_score(true, pred, average='macro', beta=0.5, zero_division=0),
        "precision_sampled": precision_score(true, pred, average='samples', zero_division=0),
        "precision_micro": precision_score(true, pred, average='micro', zero_division=0),
        "precision_macro": precision_score(true, pred, average='macro', zero_division=0),
        "recall_sampled": recall_score(true, pred, average='samples', zero_division=0),
        "recall_micro": recall_score(true, pred, average='micro', zero_division=0),
        "recall_macro": recall_score(true, pred, average='macro', zero_division=0),
        "accuracy": accuracy_score(true, pred),
        })

    labels_float = labels.astype(float)
    results.update({
        "coverage_error": coverage_error(labels_float, logits),
        "label_ranking_loss": label_ranking_loss(labels_float, logits),
        "lrap": label_ranking_average_precision_score(labels_float, logits),
    })

    metrics_log.append(results.copy())

    print(results)

    return results


In [50]:
criterion = nn.BCEWithLogitsLoss()
best_model_ta = model.load_state_dict(torch.load('scibert_best_model.pt'))
results = test(best_model_ta, test_dataloader, criterion, is_final_test=True)

AttributeError: '_IncompatibleKeys' object has no attribute 'eval'

In [None]:
for k, v in results.items(): 
    print(f"{k}: {v:.4f}")

In [None]:
criterion = nn.BCEWithLogitsLoss()
results_val = test(best_model_ta, val_dataloader, criterion, is_final_test=True)

In [None]:
for k, v in results_val.items(): 
    print(f"{k}: {v:.4f}")

# Metrics log

In [None]:
metrics_data = {
    'f1_sampled': [],
    'f1_micro': [],
    'f1_macro': [],
    'f_05_sample': [],
    'f_05_micro': [],
    'f_05_macro': [],
    'precision_sampled': [],
    'precision_micro': [],
    'precision_macro': [],
    'recall_sampled': [],
    'recall_micro': [],
    'recall_macro': [],
    'accuracy': [],
    'coverage_error': [],
    'label_ranking_loss': [],
    'lrap': []
}

# 遍歷你的日誌數據，提取數值
for log_entry in metrics_log:
    for metric_name, value in log_entry.items():
        metrics_data[metric_name].append(float(value)) # 確保轉換為 float

In [None]:
print(metrics_data)

In [None]:
plt.figure(figsize=(12, 7))
epochs = range(1, len(metrics_log) + 1)
for metric_name, values in metrics_data.items():
    print(f"{metric_name}: {len(values)}")
    if metric_name != 'lrap' and metric_name != 'coverage_error':
        plt.plot(epochs, values, label=metric_name)

plt.xlabel('Epoch')
plt.ylabel('Metric Value')
plt.title('Tactic - All Metrics Over Epochs')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 7))
epochs = range(1, len(metrics_log) + 1)
for metric_name, values in metrics_data.items():
    # print(f"{metric_name}: {len(values)}")
    if metric_name == 'coverage_error':
        plt.plot(epochs, values, label=metric_name)

plt.xlabel('Epoch')
plt.ylabel('Metric Value')
plt.title('Tactic -All Metrics Over Epochs')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 7))
epochs = range(1, len(metrics_log) + 1)
for metric_name, values in metrics_data.items():
    # print(f"{metric_name}: {len(values)}")
    if metric_name == 'lrap':
        plt.plot(epochs, values, label=metric_name)

plt.xlabel('Epoch')
plt.ylabel('Metric Value')
plt.title('Tactic -All Metrics Over Epochs')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
def print_F1_based_on_distribution(y_true, y_pred, Y, columns):
  fig,ax = plt.subplots()

  results = pd.DataFrame(columns = columns)
  results.loc[len(results)] = f1_score(y_true, y_pred, average=None)


  Y_count = Y.apply(np.sum, axis=0)

  # print(Y_count[0:14])

  Y_count_sorted = Y_count[0:14].sort_values(ascending=False)

  ax.bar(Y_count_sorted.index, Y_count_sorted.values)
  ax.set_xlabel("Tactics")
  ax.set_ylabel("Number of CVEs")
  plt.xticks(rotation=90)

  ax2=ax.twinx()
  ax2.plot(Y_count_sorted.index, results[Y_count_sorted.index].iloc[0], color='red')
  ax2.set_ylabel("F1 Score")

  ax = plt.gca()
  plt.show()

In [None]:
def print_precision_based_on_distribution(y_true, y_pred, Y, columns):
  fig,ax = plt.subplots()

  results = pd.DataFrame(columns = columns)
  results.loc[len(results)] = precision_score(y_true, y_pred, average=None)


  Y_count = Y.apply(np.sum, axis=0)

  # print(Y_count[0:14])

  Y_count_sorted = Y_count[0:14].sort_values(ascending=False)

  ax.bar(Y_count_sorted.index, Y_count_sorted.values)
  ax.set_xlabel("Tactics")
  ax.set_ylabel("Number of CVEs")
  plt.xticks(rotation=90)

  ax2=ax.twinx()
  ax2.plot(Y_count_sorted.index, results[Y_count_sorted.index].iloc[0], color='red')
  ax2.set_ylabel("Precision Score")

  ax = plt.gca()
  plt.show()

In [None]:
def compute_metrics(predicted_y, true_y, metric_function, columns, limit):
  results = pd.DataFrame(columns = columns)
  if (metric_function == accuracy_score):
    results.loc[len(results)] = metric_function(true_y, predicted_y)
  else:
    results.loc[len(results)] = metric_function(true_y, predicted_y, average=None)

  sorted_results = results.sort_values(by=0, axis=1, ascending=False)

  for col in sorted_results.columns[:limit]:
        print(f"{col}: {sorted_results[col].values[0]}")

  return sorted_results.iloc[:, :limit]

In [None]:
# df = pd.read_csv('merged_all_columns.csv', encoding='utf-8')
# # df_attack['tactic_label'] = df_attack.apply(lambda x: list(x[TACTIC]), axis=1)
# # df_attack['technique_label'] = df_attack.apply(lambda x: list(x[TECHNIQUE]), axis=1)

# df_tactic_labels = df[TACTIC]
# df_technique_labels = df[TECHNIQUE]

# df['tactic_label'] = df.apply(lambda x: list(x[TACTIC]), axis=1)
# df['technique_label'] = df.apply(lambda x: list(x[TECHNIQUE]), axis=1)

# df['text_clean'] = df['Text'].map(lambda t: preprocess(t))


# train, test = train_test_split(df, test_size=0.2, random_state=2222)
# validation, test = train_test_split(test, test_size=0.5, random_state=2222)


In [None]:
df = pd.read_csv('merged_all_columns.csv', encoding='utf-8')
df_tram = pd.read_csv(r"C:\Users\user\OneDrive\桌面\研究所\課程內容\遞迴神經網路與變形器\Final_project\dataset\tram_with_all_labels.csv", encoding='utf-8')
df_attack = pd.read_csv(r"C:\Users\user\OneDrive\桌面\研究所\課程內容\遞迴神經網路與變形器\Final_project\dataset\attack_with_all_labels.csv", encoding='utf-8')



# df_attack['tactic_label'] = df_attack.apply(lambda x: list(x[TACTIC]), axis=1)
# df_attack['technique_label'] = df_attack.apply(lambda x: list(x[TECHNIQUE]), axis=1)

df_tactic_labels = df[TACTIC]
df_technique_labels = df[TECHNIQUE]

In [None]:
df['tactic_label'] = df.apply(lambda x: list(x[TACTIC]), axis=1)
df['technique_label'] = df.apply(lambda x: list(x[TECHNIQUE]), axis=1)
df_tram['tactic_label'] = df_tram.apply(lambda x: list(x[TACTIC]), axis=1)
df_tram['technique_label'] = df_tram.apply(lambda x: list(x[TECHNIQUE]), axis=1)
df_attack['tactic_label'] = df_attack.apply(lambda x: list(x[TACTIC]), axis=1)
df_attack['technique_label'] = df_attack.apply(lambda x: list(x[TECHNIQUE]), axis=1)

df = pd.concat([df,df_tram, df_attack], ignore_index=True)

train, test = train_test_split(df, test_size=0.2, random_state=2222)
validation, test = train_test_split(test, test_size=0.5, random_state=2222)


In [None]:
print("------Validation results --------")
print(BEST_F1)

print("F1 scores per class")
y_train_df = train.drop(train.columns[0:2], axis=1)

f1_best_metrics = compute_metrics(BEST_PREDICTED, BEST_TRUE, f1_score, test.columns[2:16], 14)
print("----------------------------------")

print("Recall scores per class")
recall_best_metrics = compute_metrics(BEST_PREDICTED, BEST_TRUE, recall_score, test.columns[2:16], 14)
print("----------------------------------")

print("Precision scores per class")
precision_best_metrics = compute_metrics(BEST_PREDICTED, BEST_TRUE, precision_score, test.columns[2:16], 14)
print("----------------------------------")

print("Accuracy scores per class")
acc_best_metrics = compute_metrics(BEST_PREDICTED, BEST_TRUE, accuracy_score, test.columns[2:16], 14)

In [None]:
print_F1_based_on_distribution(BEST_PREDICTED, BEST_TRUE, y_train_df,  test.columns[2:16])

In [None]:
print_precision_based_on_distribution(BEST_PREDICTED, BEST_TRUE, y_train_df,  test.columns[2:16])

In [None]:
y_train_df = train.drop(train.columns[0:2], axis=1)
Y_count = y_train_df.apply(np.sum, axis=0)
Y_count_sorted = Y_count[0:14].sort_values(ascending=False)

print(Y_count_sorted)

TA0002    514
TA0001    350
TA0040    276
TA0004    202
TA0009    187
TA0006    129
TA0005     78
TA0007     57
TA0003     43
TA0008     38
TA0042     32
TA0043     20
TA0010      7
TA0011      6
dtype: object


In [None]:
y_validation_df = validation.drop(validation.columns[0:2], axis=1)
Y_count_val = y_validation_df.apply(np.sum, axis=0)
Y_count_val_sorted = Y_count_val[0:14].sort_values(ascending=False)
print(Y_count_val_sorted)

TA0002    53
TA0001    46
TA0040    32
TA0004    29
TA0009    28
TA0006    18
TA0005    16
TA0003     8
TA0043     4
TA0007     3
TA0008     3
TA0010     3
TA0042     1
TA0011     0
dtype: object


In [None]:
y_test_df = test.drop(test.columns[0:2], axis=1)
Y_count_test = y_test_df.apply(np.sum, axis=0)
Y_count_test_sorted = Y_count_test[0:14].sort_values(ascending=False)
print(Y_count_test_sorted)

TA0002    70
TA0001    47
TA0040    31
TA0009    22
TA0004    20
TA0006    17
TA0003     8
TA0005     8
TA0008     7
TA0007     5
TA0042     4
TA0043     2
TA0010     0
TA0011     0
dtype: object
