In [1]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

In [2]:
class Dataset:
    def __init__(self, excerpt, tokenizer, max_len):
        self.excerpt = excerpt
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.excerpt)

    def __getitem__(self, item):
        text = str(self.excerpt[item])
        inputs = self.tokenizer(
            text, 
            max_length=self.max_len, 
            padding="max_length", 
            truncation=True
        )

        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]

        return {
            "input_ids": torch.tensor(ids, dtype=torch.long),
            "attention_mask": torch.tensor(mask, dtype=torch.long),
        }

In [3]:
def generate_predictions(model_path,weight_path, max_len):
    model = AutoModelForSequenceClassification.from_pretrained(model_path,num_labels=1)
    model.load_state_dict(torch.load(weight_path,map_location=device))
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    model = model.to(device)
    model.eval()
    df = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
    dataset = Dataset(excerpt=df.excerpt.values, tokenizer=tokenizer, max_len=max_len)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=16, num_workers=2, pin_memory=True, shuffle=False, drop_last=False
    )
    final_output = []

    for b_idx, data in enumerate(data_loader):
        with torch.no_grad():
            for key, value in data.items():
                data[key] = value.to(device)
            output = model(**data)
            output = output.logits.squeeze(-1).detach().cpu().numpy()
            final_output = np.append(final_output,output)
    
    torch.cuda.empty_cache()
    return final_output

In [4]:
preds1 = generate_predictions(model_path="../input/roberta-transformers-pytorch/distilroberta-base/",
                              weight_path="../input/commonlitreadability-weight/distilroberta-base_fold1_acc0.491.pt",
                              max_len=256)
preds2 = generate_predictions(model_path="../input/roberta-transformers-pytorch/distilroberta-base/",
                              weight_path="../input/commonlitreadability-weight/distilroberta-base_fold2_0.519.pt", 
                              max_len=256)
preds3 = generate_predictions(model_path="../input/roberta-transformers-pytorch/distilroberta-base/",
                              weight_path="../input/commonlitreadability-weight/distilroberta-base_fold3_0.50.pt", 
                              max_len=256)
preds4 = generate_predictions(model_path="../input/roberta-transformers-pytorch/distilroberta-base/",
                              weight_path="../input/commonlitreadability-weight/distilroberta-base_fold4_acc0.51.pt",
                              max_len=256)
preds5 = generate_predictions(model_path="../input/roberta-transformers-pytorch/distilroberta-base/",
                              weight_path="../input/commonlitreadability-weight/distilroberta-base_fold5_0.498.pt",
                              max_len=256)
"""
preds6 = generate_predictions(model_path="../input/roberta-transformers-pytorch/distilroberta-base/",
                              weight_path="../input/commonlitreadability-weight/distilroberta-base_fold2_acc0.516_len280.pt",
                              max_len=280)
"""

Some weights of the model checkpoint at ../input/roberta-transformers-pytorch/distilroberta-base/ were not used when initializing RobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ../input/roberta-transformers-pytorch/distilroberta-base/ and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias

'\npreds6 = generate_predictions(model_path="../input/roberta-transformers-pytorch/distilroberta-base/",\n                              weight_path="../input/commonlitreadability-weight/distilroberta-base_fold2_acc0.516_len280.pt",\n                              max_len=280)\n'

In [5]:
preds = (preds5 + preds4 + preds3 + preds2 + preds1)/5
#preds = (preds5 + preds3)/2

In [6]:
test_df = pd.read_csv('/kaggle/input/commonlitreadabilityprize/test.csv')
predictions = pd.DataFrame()
predictions['id'] = test_df['id']
predictions['target'] = preds
predictions.to_csv("/kaggle/working/submission.csv", index=False)
predictions

Unnamed: 0,id,target
0,c0f722661,-0.538886
1,f0953f0a5,-0.366744
2,0df072751,-0.34269
3,04caf4e0c,-2.632129
4,0e63f8bea,-1.911068
5,12537fe78,-1.272275
6,965e592c0,0.322191
