### Llama 3.2 Vision 

In [1]:
import ollama
import pprint
import pandas as pd
import os
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import re

In [2]:
test_data = pd.read_csv('esnlive_test_final.csv')
image_folder = "flickr30k_images"
# crete range of samples one set shoul be 1000 entires 
ranges = [(i, min(i+1000, len(test_data))) for i in range(0, len(test_data), 1000)]
ranges_dict = {i: (x, y) for i, (x, y) in enumerate(ranges)}
ranges_dict

{0: (0, 1000),
 1: (1000, 2000),
 2: (2000, 3000),
 3: (3000, 4000),
 4: (4000, 5000),
 5: (5000, 6000),
 6: (6000, 7000),
 7: (7000, 8000),
 8: (8000, 9000),
 9: (9000, 10000),
 10: (10000, 11000),
 11: (11000, 12000),
 12: (12000, 13000),
 13: (13000, 14000),
 14: (14000, 14740)}

In [None]:
def chunk_test_data(from_index, to_index):
    test_data_sub = test_data.iloc[from_index:to_index]

    def zero_shot(model, premise_path, hypothesis):
        if not os.path.exists(premise_path):
            raise FileNotFoundError(f"Image file not found: {premise_path}")
        
        response = ollama.chat(
                            model=model,
                            messages=[{
                                    "role": "user",
                                    "content": f"""Given the text hypothesis '{hypothesis}' classify it into one of the following three categories: [Entailment, Neutral, Contradiction].
                                    Entailment holds if there is enough evidence in image Premise to conclude that text Hypothesis is true.
                                    Contradiction holds if text Hypothesis contradicts image Premise.
                                    Neutral holds if the evidence in image Premise is insufficient to draw a conclusion from text Hypothesis
                                    In your answer don't give any explanation, just the category.""",
                                    "images": [premise_path]}])
        
        try:
            full_prediction = response.message.content
            print("Prediction:", full_prediction)

            match = re.search(r'\b(Entailment|Neutral|Contradiction)\b', full_prediction, re.IGNORECASE)
            prediction = match.group(0) if match else "No match found"
        except AttributeError:
            prediction = "No content in response"
        # print(f"Prediction: {prediction}")
        return prediction

    model_name = "llama3.2-vision"
    results = []

    for _, sample in test_data_sub.iterrows():
        image_name = sample['pairID'].split('#')[0]
        premise_path = os.path.join(image_folder, image_name)
        hypothesis = sample['hypothesis']
        label = sample['gold_label']
        prediction = "Error"

        try:
            prediction = zero_shot(model_name, premise_path, hypothesis)
        except Exception as e:
            print(f"Error processing sample: {sample}. Error: {e}")
        
        results.append({'premise': premise_path, 'hypothesis': hypothesis, 'label': label, 'prediction': prediction})

    # pprint.pprint(results)
    results_df = pd.DataFrame(results)
    results_df["prediction"] = results_df["prediction"].str.lower()

    results_df.to_csv(f"results/results_from_{from_index}_to_{to_index}.csv".format(0), index=False)

In [4]:
from_, to_ = ranges_dict[0]

# chunk_test_data(from_, to_)
chunk_test_data(0, 3)

Prediction: Entailment
Prediction: Entailment.
Prediction: Contradiction.
