In [1]:
import os
import gc
import cv2
import copy
import time
import random
import joblib

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# For Transformer Models
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import DataCollatorWithPadding
from transformers import Trainer, TrainingArguments
from transformers.modeling_outputs import SequenceClassifierOutput
from datasets import load_dataset

# Utils
from tqdm import tqdm
from feedback_custom_funtions import FeedBackDataset
from model_building import AttentionPooling, FeedBackModel

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
model_paths = [
    #'../input/fb3-debertav3xsmall-attentionlayer/outputs-0/pytorch_model.bin',
    #'../input/fb3-debertav3xsmall-attentionlayer/outputs-1/pytorch_model.bin',
    #'../input/fb3-debertav3xsmall-attentionlayer/outputs-2/pytorch_model.bin',
    #'../input/fb3-debertav3xsmall-attentionlayer/outputs-3/pytorch_model.bin',
    "/kaggle/input/fb3-debertav3base-attention-quant/outputs-3/checkpoint-1468/pytorch_model.bin"
]  

In [3]:
config = dict(
    seed = 42,
    model_name = '/kaggle/input/debertav3base',
    test_batch_size = 64,
    max_length = 512,
    num_classes = 6,
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    PoolingLayer = AttentionPooling(768)
)

config["tokenizer"] = AutoTokenizer.from_pretrained(config['model_name'])

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
  "The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option"
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
test_df = pd.read_csv("../input/feedback-prize-english-language-learning/test.csv")

In [5]:
def preprocess_function(examples):
    return config["tokenizer"](examples["full_text"], truncation=True, add_special_tokens=True, max_length = config["max_length"])

data_files = {"test": "../input/feedback-prize-english-language-learning/test.csv"}
test_df = load_dataset("csv", data_files= data_files)

test_dataset = test_df.map(preprocess_function, batched=True, num_proc=2).remove_columns(['text_id', 'full_text'])
collate_fn = DataCollatorWithPadding(tokenizer=config['tokenizer'])

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-a296fd51d9f1685d/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-a296fd51d9f1685d/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

    

#0:   0%|          | 0/1 [00:00<?, ?ba/s]

#1:   0%|          | 0/1 [00:00<?, ?ba/s]

In [6]:
class FeedBackModel_2(nn.Module):
    def __init__(self, model_name):
        super(FeedBackModel, self).__init__()
        self.config = AutoConfig.from_pretrained(model_name)
        self.config.hidden_dropout_prob = 0
        self.config.attention_probs_dropout_prob = 0
        self.model = AutoModel.from_pretrained(model_name, config=self.config)
        self.drop = nn.Dropout(p=0.2)
        self.pooler = AttentionPooling(384)
        self.regressor = nn.Sequential(
            nn.Linear(self.config.hidden_size, int(self.config.hidden_size/2)),
            nn.ReLU(),
            nn.Linear(int(self.config.hidden_size/2), config['num_classes'])
        )
        self.linear_regressor = nn.Linear(self.config.hidden_size, config['num_classes'])
        
    def forward(self, input_ids, attention_mask):
        out = self.model(input_ids=input_ids,
                         attention_mask=attention_mask, 
                         output_hidden_states=False)
        out = self.pooler(out.last_hidden_state, attention_mask)
        out = self.drop(out)
        outputs = self.linear_regressor(out)
        return SequenceClassifierOutput(logits=outputs)

## Predictions

In [7]:
def round_to_nearest_half_int(num):
    return np.round(num * 2) / 2

def inference(model_paths, test_dataset, device):
    final_preds = []
    for i, path in enumerate(model_paths):
        model = FeedBackModel(config['model_name'], 
                              config["num_classes"], 
                              PoolingLayer = config["PoolingLayer"]).to(config['device'])
        model.load_state_dict(torch.load(path, map_location= config["device"]))
        
        print(f"Getting predictions for model {i+1}")
        training_args = TrainingArguments(
                output_dir=".",
                per_device_eval_batch_size=config['test_batch_size'],
                label_names=["target"]
            )
        trainer = Trainer(model=model,
                          args=training_args,
                          data_collator=collate_fn)
        predictions = trainer.predict(test_dataset)
        preds = predictions.predictions
        final_preds.append(preds)
    
    final_preds = np.array(final_preds[0])
    #final_preds = np.mean(final_preds, axis=0)
    #final_preds = round_to_nearest_half_int(final_preds)
    return final_preds

In [8]:
preds = inference(model_paths, test_dataset["test"], config['device'])

Some weights of the model checkpoint at /kaggle/input/debertav3base were not used when initializing DebertaV2Model: ['mask_predictions.dense.bias', 'mask_predictions.LayerNorm.weight', 'mask_predictions.classifier.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'mask_predictions.classifier.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
The following colum

Getting predictions for model 1


In [9]:
sample = pd.read_csv("../input/feedback-prize-english-language-learning/sample_submission.csv")
sample.head()

Unnamed: 0,text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0000C359D63E,3.0,3.0,3.0,3.0,3.0,3.0
1,000BAD50D026,3.0,3.0,3.0,3.0,3.0,3.0
2,00367BB2546B,3.0,3.0,3.0,3.0,3.0,3.0


In [10]:
sample.loc[:,"cohesion":"conventions"] = preds
sample.head()

Unnamed: 0,text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0000C359D63E,2.988701,2.605177,3.085942,2.935252,2.767663,2.67787
1,000BAD50D026,2.501329,2.374877,2.734487,2.389486,2.182767,2.564789
2,00367BB2546B,3.642371,3.475777,3.761467,3.759206,3.561251,3.426465


In [11]:
sample.to_csv('submission.csv', index=False)