In [None]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from abc import ABC, abstractmethod
import json
import pandas as pd

from tqdm.notebook import tqdm

In [None]:
class ReaderABC(ABC):
    @abstractmethod
    def read(self, filename: str, **kwargs) -> None:
        raise NotImplementedError
        
    @abstractmethod
    def save(self, path: str, **kwargs) -> None:
        raise NotImplementedError

class Reader(ReaderABC):
    def __init__(self):
        self.loaded_df = None
        self.df = None

    def read(self, filename: str, **kwargs) -> None:
        raise NotImplementedError

    def save(self, path: str, **kwargs) -> None:
        import jsonlines
        with jsonlines.open(path, mode='w') as jsonl_file:
            jsonl_file.write(self.df.to_json(**kwargs))

    def create_df(
            self,
            columns=('pairID', 'gold_label', 'sentence1', 'sentence2')) -> None:
        self.df = self.loaded_df[list(columns)]
        return self.df

    def add_column(self, col_name: str, func) -> None:
        new_df = func(self.df['sentence1'], self.df['sentence2'])
        self.df.insert(
            len(self.df.keys()),
            col_name,
            new_df,
            True)
        return self.df

class JsonlReader(Reader):
    def read(self, filename: str, **kwargs):
        self.loaded_df = pd.read_json(filename, **kwargs)
        return self.loaded_df

class TextReader(Reader):
    def read(self, filename: str, **kwargs) -> None:
        self.loaded_df = pd.read_csv(filename, **kwargs)
        return self.loaded_df

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

In [None]:
tokenizer = AutoTokenizer.from_pretrained("Transformers-model-name-or-path")
model = AutoModelForSequenceClassification.from_pretrained("Transformers-model-name-or-path", num_labels=3)
model = model.to(device)

In [None]:
reader = JsonlReader()
reader.read("your-jsonl-file-path", lines=True) # multinli_1.0_dev_matched.jsonl
reader.create_df(columns=('pairID', 'gold_label', 'sentence1', 'sentence2'))

In [None]:
logits = []
probs = []
for idx, row in tqdm(reader.df.iterrows()):
    with torch.no_grad():
        inputs = tokenizer.encode_plus(row["sentence1"], row["sentence2"], 
                                       add_special_tokens=True,
                                       return_tensors="pt").to(device)
        outputs = model(**inputs)
    probs.append(F.softmax(outputs.logits)[0].to("cpu"))
    logits.append(outputs.logits[0].tolist())
    del inputs, outputs
    torch.cuda.empty_cache()

In [None]:
reader.df["logits"] = logits
for i in range(len(probs)):
    probs[i] = probs[i].tolist()
reader.df["probs"] = probs

In [None]:
reader.df.head()

In [None]:
json_file = reader.df.to_json(orient='records', lines=True)
with open("your-output-path", 'w') as f:
    f.write(json_file)