In [None]:
import os
import pandas as pd

from datasets import load_dataset
from pathlib import Path
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from functools import lru_cache

@lru_cache(maxsize=1)
def load_nli_classifier(model_name="microsoft/deberta-v2-xlarge-mnli"):
    classifier = pipeline("zero-shot-classification", model=model_name)    
    return classifier
    
@lru_cache(maxsize=1)
def load_dataset():
    msmarco_ds = load_dataset("ms_marco", "v2.1", split="train", streaming=True)
    return msmarco_ds

def test_entailment():
    # simple test case
    query = "What was Apple's revenue in Q2 2025?"
    premise = "Apple reported $119.6 billion in revenue for Q2 2025."
    
    # test cases: entailment, contradiction, neutral
    hypothesis1 = "In the second quarter of 2025, Apple posted revenue of $119.6 billion, beating analyst expectations."
    hypothesis2 = "Apple's Q2 2025 revenue was only $90 billion, which was below expectations."
    hypothesis3 = "Apple launched the Vision Pro headset in 2024 as part of its expansion into spatial computing."
    
    classifier = load_nli_classifier()
    result = classifier(premise, [hypothesis1, hypothesis2, hypothesis3])
    scores = result['scores']
    print(result)

def test_msmarco():
    for i, e in enumerate(msmarco_ds):
        premise = e['answers'][0]
        if not premise:
            continue

        # make sure the selected passage is first
        pairs = zip(e['passages']['is_selected'], e['passages']['passage_text'])
        passages = [(s, p) for s, p in pairs]
        passages.sort(reverse=True)
        
        if passages[0][0] != 1:
            continue
        
        classifier = load_nli_classifier()
        result = classifier(premise, [p[1] for p in passages])
        print(result)
        
test_entailment()
test_msmarco()