In [1]:
import torch
from torch import nn
#from aux import ensembler, json_to_Dataset_ensemble
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer, AutoTokenizer, DataCollatorForTokenClassification

  from .autonotebook import tqdm as notebook_tqdm


In [50]:
def ensembler(output1, output2, word_ids1, word_ids2):
    word_ids1 = word_ids1[1:-1]
    word_ids2 = word_ids2[1:-1]
    output1 = output1[1:-1]
    output2 = output2[1:-1]

    stacked_tensors1 = torch.stack([torch.tensor(i) for i in output1])
    placeholder1 = torch.mean(stacked_tensors1, dim=0)

    stacked_tensors2 = torch.stack([torch.tensor(i) for i in output2])
    placeholder2 = torch.mean(stacked_tensors2, dim=0)

    new_output1 = []
    new_output2 = []

    current_word = []
    prev_word_id = 0
    for ind, word_id in enumerate(word_ids1):
        if word_id != prev_word_id:
            if word_id > prev_word_id + 1:
                new_output1.append(placeholder1)
            prev_word_id = word_id
            stacked_tensors = torch.stack(current_word)
            averaged_tensor = torch.mean(stacked_tensors, dim=0)
            new_output1.append(averaged_tensor.tolist())
            current_word = []
        current_word.append(output1[ind])

    current_word = []
    prev_word_id = 0
    for ind, word_id in enumerate(word_ids2):
        if word_id != prev_word_id:
            if word_id > prev_word_id + 1:
                new_output2.append(placeholder2)
            prev_word_id = word_id
            stacked_tensors = torch.stack(current_word)
            averaged_tensor = torch.mean(stacked_tensors, dim=0)
            new_output2.append(averaged_tensor.tolist())
            current_word = []
        current_word.append(output2[ind])

    return torch.tensor(new_output1), torch.tensor(new_output2)

In [51]:
class KingBert(nn.Module):
    def __init__(self, distilbert_tuned, albert_tuned):
        super().__init__()
        self.distilbert = distilbert_tuned
        self.albert = albert_tuned

        for distilbert_param in self.distilbert.parameters():
            distilbert_param.requires_grad = False

        for albert_param in self.albert.parameters():
            albert_param.requires_grad = False 
        
        #Here we have an alpha for each label
        self.alpha = nn.Parameter(0.5 * torch.ones(47), requires_grad = True)
        self.softmax = nn.Softmax(dim = 1)

    def forward(self, distilbert_input_ids, albert_input_ids, distil_attention_mask, alb_attention_mask, distilbert_word_ids, albert_word_ids):
        distilbert_output = self.distilbert(input_ids=torch.tensor([distilbert_input_ids]), attention_mask=torch.tensor([distil_attention_mask]))
        albert_output = self.albert(input_ids=torch.tensor([albert_input_ids]), attention_mask=torch.tensor([alb_attention_mask]))
        distilbert_fixed, albert_fixed = ensembler(distilbert_output['logits'].squeeze(), albert_output['logits'].squeeze(), distilbert_word_ids, albert_word_ids)

        final_output = distilbert_fixed * self.alpha + albert_fixed * (torch.ones(47) - self.alpha)

        return self.softmax(final_output)

In [3]:
distilbert = AutoModelForTokenClassification.from_pretrained('distilbert_finetuned')
tokenizer = AutoTokenizer.from_pretrained('distilbert_finetuned')
albert = AutoModelForTokenClassification.from_pretrained('albert_finetuned')

In [52]:
kingbert = KingBert(distilbert_tuned=distilbert, albert_tuned=albert)

In [59]:
data = json_to_Dataset_ensemble("data/ensemble_train.json")

In [60]:
data

Dataset({
    features: ['spacy_labels', 'albert_inputids', 'distilbert_inputids', 'albert_wordids', 'distilbert_wordids', 'albert_attention_masks', 'distilbert_attention_masks'],
    num_rows: 18244
})

In [53]:
res = kingbert(data[0]['distilbert_inputids'], data[0]['albert_inputids'], data[0]['distilbert_attention_masks'], data[0]['albert_attention_masks'], data[0]['distilbert_wordids'], data[0]['albert_wordids'])

torch.Size([86, 47]) torch.Size([86, 47])


  stacked_tensors1 = torch.stack([torch.tensor(i) for i in output1])
  stacked_tensors2 = torch.stack([torch.tensor(i) for i in output2])


In [54]:
res.shape

torch.Size([86, 47])

In [55]:
predictions = torch.argmax(res, dim=1)