In [2]:
!pip install transformers torch  bitsandbytes peft accelerate &> /dev/null

import os
import torch
from google.colab import drive
from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
import numpy as np

def mount_drive():
    try:
        drive.mount('/content/drive')
        return True
    except Exception as e:
        print(f"Error mounting drive: {str(e)}")
        return False

class NLIPredictor:
    def __init__(self, model_path):

        if not os.path.exists(model_path):
            raise ValueError(f"Model path does not exist: {model_path}")

        try:
            # Set device
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            print(f"Using device: {self.device}")

            # Load PEFT config
            self.config = PeftConfig.from_pretrained(model_path)
            print(f"Base model: {self.config.base_model_name_or_path}")

            # Initialize tokenizer
            self.tokenizer = AutoTokenizer.from_pretrained(
                self.config.base_model_name_or_path,
                trust_remote_code=True
            )
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token
                self.tokenizer.pad_token_id = self.tokenizer.eos_token_id

            # Configure quantization (matching training settings)
            bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_use_double_quant=True
            )

            # Load base model with quantization
            self.model = AutoModelForSequenceClassification.from_pretrained(
                self.config.base_model_name_or_path,
                num_labels=3,
                quantization_config=bnb_config,
                trust_remote_code=True,
                device_map="auto"
            )
            self.model.config.pad_token_id = self.tokenizer.pad_token_id

            # Load LoRA weights
            self.model = PeftModel.from_pretrained(
                self.model,
                model_path,
                device_map="auto"
            )

            # Set evaluation mode
            self.model.eval()
            print("Model loaded successfully and set to evaluation mode")

            # Label mapping
            self.label_map = {0: "entailment", 1: "neutral", 2: "contradiction"}

        except Exception as e:
            raise RuntimeError(f"Error initializing model: {str(e)}")

    def _validate_inputs(self, premise, hypothesis):
        if not isinstance(premise, str) or not isinstance(hypothesis, str):
            raise ValueError("Premise and hypothesis must be strings")
        if not premise.strip() or not hypothesis.strip():
            raise ValueError("Premise and hypothesis cannot be empty")

    def predict(self, premise, hypothesis, return_probabilities=False):

        try:
            self._validate_inputs(premise, hypothesis)

            # Tokenize input
            inputs = self.tokenizer(
                premise,
                hypothesis,
                padding=True,
                truncation=True,
                max_length=128,
                return_tensors="pt"
            )
            inputs = {k: v.to(self.device) for k, v in inputs.items()}

            # Get prediction
            with torch.no_grad(), torch.autocast(device_type=self.device, dtype=torch.float16):
                outputs = self.model(**inputs)
                logits = outputs.logits
                probabilities = torch.nn.functional.softmax(logits, dim=-1)
                predicted_class = torch.argmax(probabilities, dim=-1).item()
                confidence = probabilities[0][predicted_class].item()

            result = {
                "prediction": self.label_map[predicted_class],
                "confidence": confidence,
                "premise": premise,
                "hypothesis": hypothesis
            }

            if return_probabilities:
                result["probabilities"] = {
                    self.label_map[i]: prob.item()
                    for i, prob in enumerate(probabilities[0])
                }

            return result

        except Exception as e:
            raise RuntimeError(f"Error during prediction: {str(e)}")

    def predict_batch(self, premises, hypotheses, batch_size=8):

        # Validate inputs
        if not premises or not hypotheses:
            raise ValueError("Premises and hypotheses lists cannot be empty")
        if len(premises) != len(hypotheses):
            raise ValueError("Number of premises must match number of hypotheses")
        if not isinstance(batch_size, int) or batch_size < 1:
            raise ValueError("Batch size must be a positive integer")

        try:
            results = []

            for i in range(0, len(premises), batch_size):
                batch_premises = premises[i:i + batch_size]
                batch_hypotheses = hypotheses[i:i + batch_size]

                # Validate each pair in batch
                for p, h in zip(batch_premises, batch_hypotheses):
                    self._validate_inputs(p, h)

                # Tokenize batch
                inputs = self.tokenizer(
                    batch_premises,
                    batch_hypotheses,
                    padding=True,
                    truncation=True,
                    max_length=128,
                    return_tensors="pt"
                )
                inputs = {k: v.to(self.device) for k, v in inputs.items()}

                # Get predictions
                with torch.no_grad(), torch.autocast(device_type=self.device, dtype=torch.float16):
                    outputs = self.model(**inputs)
                    logits = outputs.logits
                    probabilities = torch.nn.functional.softmax(logits, dim=-1)
                    predicted_classes = torch.argmax(probabilities, dim=-1)

                    for j in range(len(batch_premises)):
                        pred_class = predicted_classes[j].item()
                        confidence = probabilities[j][pred_class].item()

                        results.append({
                            "prediction": self.label_map[pred_class],
                            "confidence": confidence,
                            "premise": batch_premises[j],
                            "hypothesis": batch_hypotheses[j]
                        })

            return results

        except Exception as e:
            raise RuntimeError(f"Error during batch prediction: {str(e)}")

def main():
    # Mount drive first
    if not mount_drive():
        print("Error: Could not mount Google Drive")
        return

    try:
        # Initialize predictor
        model_path = "/content/drive/My Drive/Sem 7/LLM/Assignment 3/A3_Outputs/phi2_qlora_nli_final"
        predictor = NLIPredictor(model_path)

        # Single prediction example
        print("\nSingle Prediction Example:")
        try:
            result = predictor.predict(
                premise="A person on a horse jumps over a broken down wall.",
                hypothesis="A person is outdoors, on a horse.",
                return_probabilities=True
            )

            print(f"Premise: {result['premise']}")
            print(f"Hypothesis: {result['hypothesis']}")
            print(f"Prediction: {result['prediction']}")
            print(f"Confidence: {result['confidence']:.4f}")
            print("Class Probabilities:", result['probabilities'])

        except Exception as e:
            print(f"Error in single prediction: {str(e)}")

        # Batch prediction example
        print("\nBatch Prediction Example:")
        try:
            premises = [
                "A person on a horse jumps over a broken down wall.",
                "Two women are embracing while holding to go packages.",
                "A soccer game with multiple males playing."
            ]
            hypotheses = [
                "A person is outdoors, on a horse.",
                "The women are moving.",
                "Some men are playing a sport."
            ]

            results = predictor.predict_batch(premises, hypotheses)

            for result in results:
                print(f"\nPremise: {result['premise']}")
                print(f"Hypothesis: {result['hypothesis']}")
                print(f"Prediction: {result['prediction']}")
                print(f"Confidence: {result['confidence']:.4f}")

        except Exception as e:
            print(f"Error in batch prediction: {str(e)}")

    except Exception as e:
        print(f"Error in main execution: {str(e)}")

if __name__ == "__main__":
    main()

Mounted at /content/drive
Using device: cuda
Base model: microsoft/phi-2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of PhiForSequenceClassification were not initialized from the model checkpoint at microsoft/phi-2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


Model loaded successfully and set to evaluation mode

Single Prediction Example:
Premise: A person on a horse jumps over a broken down wall.
Hypothesis: A person is outdoors, on a horse.
Prediction: entailment
Confidence: 0.9309
Class Probabilities: {'entailment': 0.9308837652206421, 'neutral': 0.06789553910493851, 'contradiction': 0.0012206893879920244}

Batch Prediction Example:

Premise: A person on a horse jumps over a broken down wall.
Hypothesis: A person is outdoors, on a horse.
Prediction: entailment
Confidence: 0.9276

Premise: Two women are embracing while holding to go packages.
Hypothesis: The women are moving.
Prediction: neutral
Confidence: 0.7205

Premise: A soccer game with multiple males playing.
Hypothesis: Some men are playing a sport.
Prediction: entailment
Confidence: 0.9968
