In [1]:
# import os
# import re
# import random
import warnings
# import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
import torch
from typing import Literal
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# from transformers import TrainingArguments, Trainer
# from transformers import DataCollatorWithPadding
# from datasets import Dataset
# from sklearn.model_selection import train_test_split
# from sklearn.model_selection import StratifiedKFold
# from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# from sklearn.metrics import cohen_kappa_score

warnings.simplefilter("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
class VotingModel(torch.nn.Module):
    def __init__(
        self,
        model_paths: list,
        tokenizer_path: str = None,
        voting_mode: Literal["mean", "linear"] = "mean",
        linear_layer_num: int = 0,  # if voting_mode is "linear"
    ) -> None:
        super(VotingModel, self).__init__()

        self.models = [
            AutoModelForSequenceClassification.from_pretrained(path, num_labels=6)
            for path in model_paths
        ]
        self.tmp_linear = torch.nn.Linear(6, 1).to(
            torch.device("cuda" if torch.cuda.is_available() else "cpu")
        )
        self.tokenizer = (
            torch.load(tokenizer_path)
            if tokenizer_path
            else AutoTokenizer.from_pretrained("microsoft/deberta-base")
        )

        assert (
            linear_layer_num >= 0
        ), "linear_layer_num must be greater than or equal to 0"
        self.voting_mode = "mean" if linear_layer_num == 0 else "linear"

        num_of_models = len(self.models)
        if self.voting_mode == "linear":
            self.linear_list = [
                torch.nn.Sequential(
                    torch.nn.Linear(num_of_models, num_of_models), torch.nn.ReLU()
                )
                for _ in range(linear_layer_num - 1)
            ] + [torch.nn.Linear(num_of_models, 1)]
            self.linears = torch.nn.Sequential(*self.linear_list)
            print(self.linears)

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # initialize settings of models
        for model in self.models:
            model.to(self.device)
            model.eval()
            # model.parameters().requires_grad = False  # freeze all pre-trained layers

        if self.voting_mode == "linear":
            self.linears.to(self.device)

    def forward(self, X):
        X = self.tokenizer(X, return_tensors="pt", padding=True, truncation=True)
        X = {k: v.to(self.device) for k, v in X.items()}

        with torch.no_grad():
            y = torch.stack(
                [self.tmp_linear(model(**X).logits) for model in self.models]
            )

        if self.voting_mode == "mean":
            y = y.mean(dim=0)
        elif self.voting_mode == "linear":
            y = y.squeeze(2).transpose(0, 1)
            # print(y.shape)
            y = self.linears(y)

        return y

In [3]:
voting_model1 = VotingModel(
    [
        ".\cache\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3",
        ".\cache\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3",
    ],
)
voting_model2 = VotingModel(
    [
        ".\cache\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3",
        ".\cache\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3",
    ],
    voting_mode="linear",
    linear_layer_num=5,
)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at .\cache\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at .\cache\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3 and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at .\cache\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36

Sequential(
  (0): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): ReLU()
  )
  (1): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): ReLU()
  )
  (2): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): ReLU()
  )
  (3): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): ReLU()
  )
  (4): Linear(in_features=2, out_features=1, bias=True)
)


In [4]:
sample_test = pd.read_csv("./dataset/kaggle/test.csv")
sample_test["full_text"]

0    Many people have car where they live. The thin...
1    I am a scientist at NASA that is discussing th...
2    People always wish they had the same technolog...
Name: full_text, dtype: object

In [5]:
res1 = voting_model1(sample_test["full_text"].to_list())
print(res1)
res2 = voting_model2(sample_test["full_text"].to_list())
print(res2)

# print(res[0].last_hidden_state[0, 0, :])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


tensor([[0.3549],
        [0.3550],
        [0.3537]], device='cuda:0')
tensor([[0.1478],
        [0.1478],
        [0.1479]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [None]:
# print vocab dict of tokenizer
print(voting_model1.tokenizer.get_vocab()["[CLS]"])

In [None]:
del voting_model1, voting_model2
torch.cuda.empty_cache()