In [1]:
! pip install LightGBM

[0m

In [2]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import glob
import shutil
import string
import pickle
import random
import joblib
import itertools
import collections
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgbm
from xgboost import XGBRegressor
import fasttext as ft

import torch
print(f"torch.__version__: {torch.__version__}")
from torch.cuda import amp
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

import tokenizers
import transformers

from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
%env TOKENIZERS_PARALLELISM=true

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

torch.__version__: 1.11.0
env: TOKENIZERS_PARALLELISM=true


In [3]:
# ====================================================
# CFG
# ====================================================
class CFG:
    num_workers=2
    batch_size=32
    fc_dropout=0.2
    target_size=1
    max_len=512
    seed=2022
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]

In [4]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return inputs

In [5]:
def get_cpc_texts():
    contexts = []
    pattern = '[A-Z]\d+'
    for file_name in os.listdir('../input/cpc-data/CPCSchemeXML202105'):
        result = re.findall(pattern, file_name)
        if result:
            contexts.append(result)
    contexts = sorted(set(sum(contexts, [])))
    results = {}
    for cpc in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'Y']:
        with open(f'../input/cpc-data/CPCTitleList202202/cpc-section-{cpc}_20220201.txt') as f:
            s = f.read()
        pattern = f'{cpc}\t\t.+'
        result = re.findall(pattern, s)
        cpc_result = result[0].lstrip(pattern)
        for context in [c for c in contexts if c[0] == cpc]:
            pattern = f'{context}\t\t.+'
            result = re.findall(pattern, s)
            results[context] = cpc_result + ". " + result[0].lstrip(pattern)
    return results

In [6]:
class CustomModel1(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = AutoConfig.from_pretrained(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self._init_weights(self.attention)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def _resize_token_embeddings(self, length):
        self.model.resize_token_embeddings(length)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.fc(self.fc_dropout(feature))
        return output

In [7]:
class CustomModel2(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = AutoConfig.from_pretrained(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)

        hidden_dropout_prob: float = 0.1
        layer_norm_eps: float = 1e-7

        self.config.update(
            {
                "output_hidden_states": True,
                "hidden_dropout_prob": hidden_dropout_prob,
                "layer_norm_eps": layer_norm_eps,
                "add_pooling_layer": False,
                "num_labels": CFG.target_size,
            }
        )
        self.fc_dropout1 = nn.Dropout(0)
        self.fc_dropout2 = nn.Dropout(0.1)
        self.fc_dropout3 = nn.Dropout(0.2)
        self.fc_dropout4 = nn.Dropout(0.3)
        self.fc_dropout5 = nn.Dropout(0.4)
        self.fc = nn.Linear(self.config.hidden_size, self.cfg.target_size)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self._init_weights(self.attention)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def _resize_token_embeddings(self, length):
        self.model.resize_token_embeddings(length)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        output1 = self.fc(self.fc_dropout1(feature))
        output2 = self.fc(self.fc_dropout2(feature))
        output3 = self.fc(self.fc_dropout3(feature))
        output4 = self.fc(self.fc_dropout4(feature))
        output5 = self.fc(self.fc_dropout5(feature))
        output = (output1 + output2 + output3+ output4 + output5)/5
        return output, output1, output2, output3, output4, output5

In [8]:
class CustomModel21(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
        else:
            self.config = AutoConfig.from_pretrained(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
        self.fc_dropout = nn.Dropout(cfg.fc_dropout)
        self.fc = nn.Linear(self.config.hidden_size, 5)
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        self._init_weights(self.attention)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)

    def _resize_token_embeddings(self, length):
        self.model.resize_token_embeddings(length)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        # feature = torch.mean(last_hidden_states, 1)
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.fc(self.fc_dropout(feature))
        return output

In [9]:
def inference_fn(test_loader, model_type, config_path, model_path, device):
    if model_type == 1:
        model = CustomModel1(CFG, config_path = config_path, pretrained=False)
    elif model_type == 2:
        model = CustomModel2(CFG, config_path = config_path, pretrained=False)
    else:
        model = CustomModel21(CFG, config_path = config_path, pretrained=False)
    model._resize_token_embeddings(len(CFG.tokenizer))
    model.load_state_dict(torch.load(model_path))
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            if model_type == 2:
                y_preds, _, _, _, _, _ = model(inputs)
            else:
                y_preds = model(inputs)
        if model_type == 2.1:
            y_preds = nn.Softmax()(y_preds)
            preds.append(y_preds.to('cpu').numpy())
        else:
            preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    if model_type == 2.1:
        predictions = np.matmul(predictions, np.array([0.0, 0.25, 0.50, 0.75, 1.0]))
    del model
    gc.collect()
    return predictions

In [10]:
model_dict = {
    "USPPM-deberta-v3-large-BCC-MSE-2-133-tags" : {
        'config' : '../input/huggingface-models/deberta-v3-large/config.json',
        'tokenizer' : '../input/uspppm-weights-1/USPPM-deberta-v3-large-BCC-MSE-2-133-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-1/USPPM-deberta-v3-large-BCC-MSE-2-133-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.86554,0.85818,0.85701,0.85384,0.86284],
        'max_len' : 133,
        'version' : 2,
        'directory': 1
    },
    "USPPM-deberta-v3-large-XE-NA-2.1-133-tags" : {
        'config' : '../input/huggingface-models/deberta-v3-large/config.json',
        'tokenizer' : '../input/uspppm-weights-1/USPPM-deberta-v3-large-XE-NA-2.1-133-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-1/USPPM-deberta-v3-large-XE-NA-2.1-133-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.86811,0.85894,0.86076,0.85937,0.86524],
        'max_len' : 133,
        'version' : 2.1,
        'directory': 1
    },
    "USPPM-bert-for-patents-BCC-MSE-2-117-tags" : {
        'config' : '../input/huggingface-models/bert-for-patents/config.json',
        'tokenizer' : '../input/uspppm-weights-1/USPPM-bert-for-patents-BCC-MSE-2-117-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-1/USPPM-bert-for-patents-BCC-MSE-2-117-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.85571,0.84781,0.85376,0.84724,0.85582],
        'max_len' : 117,
        'version' : 2,
        'directory': 1
    },
    "USPPM-bert-for-patents-XE-NA-2.1-117-tags" : {
        'config' : '../input/huggingface-models/bert-for-patents/config.json',
        'tokenizer' : '../input/uspppm-weights-1/USPPM-bert-for-patents-XE-NA-2.1-117-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-1/USPPM-bert-for-patents-XE-NA-2.1-117-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.86499,0.85723,0.86173,0.86141,0.85525],
        'max_len' : 117,
        'version' : 2.1,
        'directory': 1
    },
#     "USPPM-PatentSBERTa-XE-NA-2.1-125-tags": {
#         'config' : '../input/huggingface-models/PatentSBERTa/config.json',
#         'tokenizer' : '../input/uspppm-weights-1/USPPM-PatentSBERTa-XE-NA-2.1-125-tags/models/tokenizer/',
#         'model_weights' : [f'../input/uspppm-weights-1/USPPM-PatentSBERTa-XE-NA-2.1-125-tags/models/model_{fold}.pth' for fold in range(5)],
#         'fold_weights' : [0.83789,0.8342,0.82875,0.82359,0.82535],
#         'max_len' : 125,
#         'version' : 2.1,
#         'directory': 1
#     },
#     "USPPM-deberta-v3-base-BCC-MSE-1-133-tags": {
#         'config' : '../input/huggingface-models/deberta-v3-base/config.json',
#         'tokenizer' : '../input/uspppm-weights-1/USPPM-deberta-v3-base-BCC-MSE-1-133-tags/models/tokenizer/',
#         'model_weights' : [f'../input/uspppm-weights-1/USPPM-deberta-v3-base-BCC-MSE-1-133-tags/models/fold_{fold}/model.pth' for fold in range(5)],
#         'fold_weights' : [0.83789,0.8342,0.82875,0.82359,0.82535],
#         'max_len' : 133,
#         'version' : 1,
#         'directory': 1
#     },
    "USPPM-deberta-v3-base-XE-NA-2.1-133-tags": {
        'config' : '../input/huggingface-models/deberta-v3-base/config.json',
        'tokenizer' : '../input/uspppm-weights-1/USPPM-deberta-v3-base-XE-NA-2.1-133-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-1/USPPM-deberta-v3-base-XE-NA-2.1-133-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.85803,0.845,0.84902,0.84866,0.85141],
        'max_len' : 133,
        'version' : 2.1,
        'directory': 1
    },
#     "USPPM-simcse-bert-for-patent-XE-NA-2.1-175-tags": {
#         'config' : '../input/huggingface-models/simcse-bert-for-patents/config.json',
#         'tokenizer' : '../input/uspppm-weights-1/USPPM-simcse-bert-for-patent-XE-NA-2.1-175-tags/models/tokenizer/',
#         'model_weights' : [f'../input/uspppm-weights-1/USPPM-simcse-bert-for-patent-XE-NA-2.1-175-tags/models/model_{fold}.pth' for fold in range(5)],
#         'fold_weights' : [0.84609,0.85297,0.8382,0.84458,0.8458],
#         'max_len' : 175,
#         'version' : 2.1,
#         'directory': 1
#     },
#     "USPPM-deberta-v3-small-BCC-MSE-2-133-tags": {
#         'config' : '../input/huggingface-models/deberta-v3-small/config.json',
#         'tokenizer' : '../input/uspppm-weights-2/USPPM-deberta-v3-small-BCC-MSE-2-133-tags/models/tokenizer/',
#         'model_weights' : [f'../input/uspppm-weights-2/USPPM-deberta-v3-small-BCC-MSE-2-133-tags/models/model_{fold}.pth' for fold in range(5)],
#         'fold_weights' : [0.83849,0.83638,0.82693,0.8288,0.8329],
#         'max_len' : 133,
#         'version' : 2,
#         'directory' ; 2
#     },
#     "USPPM-deberta-v3-small-XE-NA-2.1-133-tags": {
#         'config' : '../input/huggingface-models/deberta-v3-small/config.json',
#         'tokenizer' : '../input/uspppm-weights-2/USPPM-deberta-v3-small-XE-NA-2.1-133-tags/models/tokenizer/',
#         'model_weights' : [f'../input/uspppm-weights-2/USPPM-deberta-v3-small-XE-NA-2.1-133-tags/models/model_{fold}.pth' for fold in range(5)],
#         'fold_weights' : [0.85025,0.84183,0.84166,0.8377,0.84328],
#         'max_len' : 133,
#         'version' : 2.1,
#         'directory' ; 2
#     },
    "USPPM-deberta-large-mnli-XE-NA-2.1-175-tags": {
        'config' : '../input/huggingface-models/deberta-large-mnli/config.json',
        'tokenizer' : '../input/uspppm-weights-2/USPPM-deberta-large-mnli-XE-NA-2.1-175-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-2/USPPM-deberta-large-mnli-XE-NA-2.1-175-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.85338,0.85016,0.85061,0.85099,0.84766],
        'max_len' : 175,
        'version' : 2.1,
        'directory' : 2
    },
    "USPPM-deberta-large-mnli-BCC-MSE-1-175-tags": {
        'config' : '../input/huggingface-models/deberta-large-mnli/config.json',
        'tokenizer' : '../input/uspppm-weights-2/USPPM-deberta-large-mnli-BCC-MSE-1-175-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-2/USPPM-deberta-large-mnli-BCC-MSE-1-175-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.85338,0.85016,0.85061,0.85099,0.84766],
        'max_len' : 175,
        'version' : 1,
        'directory' : 2
    },
    "USPPM-deberta-v3-large-BCC-MSE-1-133-tags": {
        'config' : '../input/huggingface-models/deberta-v3-large/config.json',
        'tokenizer' : '../input/uspppm-weights-2/USPPM-deberta-v3-large-BCC-MSE-1-133-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-2/USPPM-deberta-v3-large-BCC-MSE-1-133-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.86796,0.86079,0.86377,0.86062,0.86571],
        'max_len' : 133,
        'version' : 1,
        'directory' : 2
    },
    "USPPM-bert-for-patents-BCC-MSE-1-117-tags": {
        'config' : '../input/huggingface-models/bert-for-patents/config.json',
        'tokenizer' : '../input/uspppm-weights-2/USPPM-bert-for-patents-BCC-MSE-1-117-tags/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-2/USPPM-bert-for-patents-BCC-MSE-1-117-tags/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.86013,0.84985,0.850340,0.85228,0.85214],
        'max_len' : 117,
        'version' : 1,
        'directory' : 2
    },   
    "USPPM-deberta-v3-large-PT-BCC-MSE-1-133-tags": {
        'config' : '../input/huggingface-models/deberta-v3-large/config.json',
        'tokenizer' : '../input/uspppm-weights-3/models/tokenizer/',
        'model_weights' : [f'../input/uspppm-weights-3/models/model_{fold}.pth' for fold in range(5)],
        'fold_weights' : [0.83789,0.8342,0.82875,0.82359,0.82535],
        'max_len' : 133,
        'version' : 1,
        'directory' : 2
    },
}

In [11]:
xgb_params = {
    "max_depth" : 12,
    "tree_method" : "gpu_hist",
    "eta" : 0.07,
    "subsample" : 1.0,
    "grow_policy" : "depthwise"    
}

In [12]:
if __name__ == "__main__":
    test = pd.read_csv("../input/us-patent-phrase-to-phrase-matching/test.csv")
    submission = pd.read_csv("../input/us-patent-phrase-to-phrase-matching/sample_submission.csv")
    cpc_texts = get_cpc_texts()
    encoder = LabelEncoder()
    test['context_num'] = encoder.fit_transform(test['context'])
    test['context_text'] = test['context'].map(cpc_texts)
    test['context_tags'] = '<' + test['context'] + '>'
    test['text'] = test['context_tags'] + '<anchor>' + test['anchor'] + '</anchor><target>' + test['target'] + '</target><context>'  + test['context_text'] + '</context>'
    test['text_'] = test['anchor'] + ' ' + test['target']
    test['text_'] = test['text_'].map(lambda x: x.lower())
    predictions_models = []
    for key, value in model_dict.items():
        predictions_fold = []
        print(key)
        CFG.tokenizer = AutoTokenizer.from_pretrained(value["tokenizer"])
        CFG.max_len = value["max_len"]
        test_dataset = TestDataset(CFG, test)
        test_loader = DataLoader(test_dataset,
                                 batch_size=CFG.batch_size,
                                 shuffle=False,
                                 num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
        
        for fold in CFG.trn_fold:
            model_path = value["model_weights"][fold]
            prediction = inference_fn(test_loader, value["version"], value["config"], model_path, device)
            predictions_fold.append(prediction)
            del prediction
            gc.collect()
        fold_preds = [x.reshape(-1,1) for x in predictions_fold]
        fold_preds = np.concatenate(fold_preds, axis=-1)
        predictions_models.append(np.matmul(fold_preds, np.array(value['fold_weights']))/np.sum(value['fold_weights']))
    del test_loader, test_dataset
    gc.collect()
    predictions_models = [np.reshape(x,(-1,1)) for x in predictions_models]
    predictions_models= np.concatenate(predictions_models, axis=-1)
    test[[f'preds_{i}' for i in range(len(model_dict))]] = predictions_models
    ft_model = ft.load_model("../input/fasttext-word-embeddings/crawl-300d-2M-subword.bin")
    test_x = np.array([ft_model.get_sentence_vector(text) for text in list(test.text_)])
    test_x = np.concatenate([test[['context_num'] + [f'preds_{i}' for i in range(len(model_dict))]].values, test_x], axis = -1)
    xgb_models = glob.glob("../input/uspppm-utils/Top 9  PT XGB/Top 9 + PT XGB/*.txt")
    weights = []
    predictions = []
    for xgb_model in xgb_models:
        model = XGBRegressor()
        model.load_model(xgb_model)
        xgb_w = float(xgb_model.split('_')[-1].split(".")[1])/1e5
        weights.extend([xgb_w])
        prediction = model.predict(test_x)
        predictions.append(prediction)
#     lgbm_models = glob.glob("../input/uspppm-utils/Top 9PT LGBM/Top 9+PT LGBM/*.txt")   
#     for lgbm_model in lgbm_models:
#         model = lgbm.Booster(model_file = lgbm_model)
#         lgbm_w = float(lgbm_model.split('_')[-1].split(".")[1])/1e5
#         weights.extend([lgbm_w])
#         prediction = model.predict(test_x)
#         predictions.append(prediction)
    weights = np.array(weights)
    model_preds = [x.reshape(-1,1) for x in predictions]
    model_preds = np.concatenate(model_preds, axis=-1)
    pred_final = np.matmul(model_preds, weights)/np.sum(weights)
    sub_df  = pd.DataFrame(columns = ['id', 'score'])
    sub_df['id'] = test['id']
    sub_df['score'] = pred_final
    sub_df.to_csv("submission.csv", index = None)
sub_df.head()

USPPM-deberta-v3-large-BCC-MSE-2-133-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-deberta-v3-large-XE-NA-2.1-133-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-bert-for-patents-BCC-MSE-2-117-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-bert-for-patents-XE-NA-2.1-117-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-deberta-v3-base-XE-NA-2.1-133-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-deberta-large-mnli-XE-NA-2.1-175-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-deberta-large-mnli-BCC-MSE-1-175-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-deberta-v3-large-BCC-MSE-1-133-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-bert-for-patents-BCC-MSE-1-117-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

USPPM-deberta-v3-large-PT-BCC-MSE-1-133-tags


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]





Unnamed: 0,id,score
0,4112d61851461f60,0.483754
1,09e418c93a776564,0.616262
2,36baf228038e314b,0.4561
3,1f37ead645e7f0c8,0.300241
4,71a5b6ad068d531f,0.078439
