<a href="https://colab.research.google.com/github/divyabala-ai/nli-multilingual-classifier/blob/main/nli_multilingual_ollama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import ollama
print(ollama)

<module 'ollama' from 'C:\\Users\\krish\\anaconda3\\Lib\\site-packages\\ollama\\__init__.py'>


In [None]:
# ✅ Importing necessary libraries
import ollama
import time
#import math
import concurrent.futures
#import numpy as np
import pandas as pd

from datasets import Dataset
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

In [None]:
# Load raw train and test data
raw_train_df = pd.read_csv(r'C:\Users\krish\Documents\Divya\Minnodi\AI_course\Projects\NLI Watson\nli_watson\train.csv') # raw_train_df - untouched reference
raw_test_df = pd.read_csv(r'C:\Users\krish\Documents\Divya\Minnodi\AI_course\Projects\NLI Watson\nli_watson\test.csv')   # raw_test_df - untouched reference

# Make working copies (preserve raw versions)
train_df = raw_train_df.copy()
test_df = raw_test_df.copy()

# Cleaning function
def clean_nli_dataframe(df):
    # Check if 'label' column exists before filtering
    if 'label' in df.columns:
        df = df.dropna()                                                     # Drop missing values
        df = df.drop_duplicates(subset=['premise', 'hypothesis'])            # Remove duplicates
        df = df.reset_index(drop=True)                                       # Reset index
        return df[['premise', 'hypothesis', 'label']]                        # Keep relevant columns from train data
    else:
        # If 'label' column doesn't exist, assume it's the test set
        df = df.dropna()                                                     # Drop missing values
        df = df.drop_duplicates(subset=['premise', 'hypothesis'])            # Remove duplicates
        df = df.reset_index(drop=True)                                       # Reset index
        return df[['premise', 'hypothesis']]                                 # Keep relevant columns from test data

# Cleaned copies for modeling
clean_train_df = clean_nli_dataframe(train_df)
clean_test_df = clean_nli_dataframe(test_df)

In [None]:
clean_train_df.head(5)

Unnamed: 0,premise,hypothesis,label
0,and these comments were considered in formulat...,The rules developed in the interim were put to...,0
1,These are issues that we wrestle with in pract...,Practice groups are not permitted to work on t...,2
2,Des petites choses comme celles-là font une di...,J'essayais d'accomplir quelque chose.,0
3,you know they can't really defend themselves l...,They can't defend themselves because of their ...,0
4,ในการเล่นบทบาทสมมุติก็เช่นกัน โอกาสที่จะได้แสด...,เด็กสามารถเห็นได้ว่าชาติพันธุ์แตกต่างกันอย่างไร,1


In [None]:
clean_test_df.head(5)

Unnamed: 0,premise,hypothesis
0,بکس، کیسی، راہیل، یسعیاہ، کیلی، کیلی، اور کولم...,"کیسی کے لئے کوئی یادگار نہیں ہوگا, کولمین ہائی..."
1,هذا هو ما تم نصحنا به.,عندما يتم إخبارهم بما يجب عليهم فعله ، فشلت ال...
2,et cela est en grande partie dû au fait que le...,Les mères se droguent.
3,与城市及其他公民及社区组织代表就IMA的艺术发展进行对话&amp,IMA与其他组织合作，因为它们都依靠共享资金。
4,Она все еще была там.,"Мы думали, что она ушла, однако, она осталась."


In [None]:
# ✅ Create formatted prompts for training and testing

# For training: combine input text and label
train_prompts = [
    f"Premise: \"{row['premise']}\"\nHypothesis: \"{row['hypothesis']}\"\nWhat is the relationship? Options: entailment, neutral, contradiction.\nAnswer: {['entailment', 'neutral', 'contradiction'][row['label']]}"
    for _, row in clean_train_df.iterrows()
]

# For testing: no label in the prompt
test_prompts = [
    f"Premise: \"{row['premise']}\"\nHypothesis: \"{row['hypothesis']}\"\nWhat is the relationship? Options: entailment, neutral, contradiction.\nAnswer:"
    for _, row in clean_test_df.iterrows()
]

# ✅ Preview a few training and test prompts
print("Train Prompt Example:\n", train_prompts[0])
print("\nTest Prompt Example:\n", test_prompts[0])


Train Prompt Example:
 Premise: "and these comments were considered in formulating the interim rules."
Hypothesis: "The rules developed in the interim were put together with these comments in mind."
What is the relationship? Options: entailment, neutral, contradiction.
Answer: entailment

Test Prompt Example:
 Premise: "بکس، کیسی، راہیل، یسعیاہ، کیلی، کیلی، اور کولمبین ہائی اسکول کے دوسرے طلبا کے نام سے بکسوں کو نشان زد کیا جائے گا جس نے اس سال پہلے اپنی زندگی کھو دی"
Hypothesis: "کیسی کے لئے کوئی یادگار نہیں ہوگا, کولمین ہائی اسکول کے طالب علموں میں سے ایک جو مر گیا."
What is the relationship? Options: entailment, neutral, contradiction.
Answer:


In [None]:
# ✅ Function to send one prompt and get a cleaned prediction
def get_prediction(prompt):
    concise_prompt = f"{prompt}\nPlease respond with just one word: entailment, neutral, or contradiction."
    response = ollama.chat(
        model="llama3:latest",
        messages=[{"role": "user", "content": concise_prompt}]
    )
    label = response.message.content.strip().lower()
    return label if label in ['entailment', 'neutral', 'contradiction'] else "unknown"

# ✅ Function to run predictions in parallel
def get_predictions_parallel(prompts, max_workers):
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(get_prediction, prompts))
    return results

# ✅ Split the training data into train and validation (using first 5000 rows for now)
train_data, val_data = train_test_split(clean_train_df, test_size=0.1, random_state=42)

# ✅ Format prompts with improved prompt engineering for better model understanding
val_prompts = [
    (
        "You are a Natural Language Inference expert.\n"
        "Classify the relationship between the premise and hypothesis as one of: entailment, contradiction, or neutral.\n"
        "Respond with only one of these words.\n\n"
        "Example:\n"
        "Premise: \"A man and a woman are walking through a brightly lit shopping mall.\"\n"
        "Hypothesis: \"They are shopping for groceries.\"\n"
        "Answer: neutral\n\n"
        f"Now classify the following:\n"
        f"Premise: \"{row['premise']}\"\n"
        f"Hypothesis: \"{row['hypothesis']}\"\n"
        "Answer:"
    )
    for _, row in val_data.iterrows()
]

# ✅ True labels
true_labels_val = [['entailment', 'neutral', 'contradiction'][row['label']] for _, row in val_data.iterrows()]

# ✅ Time the predictions
start_time = time.time()
val_predictions = get_predictions_parallel(val_prompts, max_workers=25)  # You can adjust max_workers
end_time = time.time()
elapsed_time = end_time - start_time

# ✅ Evaluate
print(f"\n⏱️ Time taken for validation predictions: {elapsed_time:.2f} seconds for {len(val_prompts)} prompts")
print(f"\nTotal predictions received: {len(val_predictions)}")
print(f"Total true labels: {len(true_labels_val)}")

if len(val_predictions) == len(true_labels_val):
    print("\nClassification Report on Validation Data:\n", classification_report(true_labels_val, val_predictions, zero_division=0))
    print("\nSample Validation Predictions vs Actuals:")
    for i in range(5):
        print(f"Predicted: {val_predictions[i]} | Actual: {true_labels_val[i]}")
else:
    print("\n⚠️ Mismatch between number of predictions and true labels. Skipping classification report.")



⏱️ Time taken for validation predictions: 10360.89 seconds for 1212 prompts

Total predictions received: 1212
Total true labels: 1212

Classification Report on Validation Data:
                precision    recall  f1-score   support

contradiction       0.94      0.45      0.61       387
   entailment       0.52      0.84      0.64       436
      neutral       0.39      0.32      0.35       389

     accuracy                           0.55      1212
    macro avg       0.62      0.54      0.53      1212
 weighted avg       0.61      0.55      0.54      1212


Sample Validation Predictions vs Actuals:
Predicted: neutral | Actual: neutral
Predicted: entailment | Actual: entailment
Predicted: contradiction | Actual: contradiction
Predicted: entailment | Actual: entailment
Predicted: entailment | Actual: neutral
