In [50]:
from ollama import Client, Options
from typing import Dict, Any, List
import yaml
import re
import os
from torch import Tensor
import pandas as pd

CHECKWORTHY_PROMPT = """Given a sentence from a political debate or tweet, your task is to predict if it is fact check-worthy statement or not.
 Your response must always start with "My response is: (yes or no based on your prediction)"
 your prediction must be yes or no, and i wont accept you prediction to be "I cannot determine"
 Never write "I'm and AI language model..."
Sentence: {claim}"""


In [54]:
class Ollama:
    """A class for generating questions and interacting with the Ollama API."""

    def __init__(self, config_path: str = "gemma7b.yaml"):
        """Initializes the Ollama client and loads necessary configurations."""
        self._ollama_client = Client(timeout=1000)
        self._config_path = config_path
        self._config = self._load_config()
        self._stream = self._config.get("stream", False)
        self._model_name = self._config.get("model", "gemma:7b")
        self._llm_options = self._get_llm_config()

    def generate(self, prompt: str) -> str:
        """Generate text using Ollama LLM for the given prompt.

        Args:
            prompt: Prompt for the LLM.

        Returns:
            Response text from an Ollama LLM.
        """
        response = self._ollama_client.generate(
            model=self._model_name,
            prompt=prompt,
            options=self._llm_options,
            stream=self._stream,
        )
        return response.get("response", "").strip()  # type: ignore

    def _load_config(self) -> Dict[str, Any]:
        """Loads configuration from a YAML file.

        Raises:
            FileNotFoundError: If the config file is not found.

        Returns:
            A dictionary with configuration values.
        """
        if not os.path.isfile(self._config_path):
            raise FileNotFoundError(
                f"Config file {self._config_path} not found."
            )
        with open(self._config_path, "r") as file:
            yaml_data = yaml.safe_load(file)
        return yaml_data

    def _get_llm_config(self) -> Options:
        """Extracts and returns the LLM (language learning model) configuration.

        Returns:
            An Options object with the LLM configuration.
        """
        return Options(self._config.get("options", {}))

In [55]:
ollama = Ollama()

In [56]:
listofpredicts = list()
similarity = 0
df = pd.read_csv('../data/processed/processed_CT24_checkworthy_english/processed_dev_test.tsv', sep='\t')
for index, row in df.iterrows():
    response = ollama.generate(CHECKWORTHY_PROMPT.format(claim=row['text']))
    print("response: ", response)
    response_words = response.split()
    answer = response_words[3].strip('.,').lower()
    if answer == "yes" or answer == "no":
        listofpredicts.append(response_words[3].strip('.,').lower())
    else:
        listofpredicts.append("no")

response:  My response is: No.
response:  My response is: No
response:  My response is: No, this sentence cannot be fact checked. It is a subjective statement that makes an opinion about the fairness of charging women more for a procedure than men.
response:  My response is: No
response:  My response is: No
response:  My response is: No
response:  My response is: No
response:  My response is: No
response:  My response is: No
response:  My response is: No
response:  My response is: No, it is not fact check-worthy. The sentence does not provide any factual information and is therefore not a fact check-worthy statement.
response:  My response is: No.
response:  My response is: No
response:  My response is: No.
response:  My response is: No.
response:  My response is: No.
response:  My response is: No.
response:  My response is: No
response:  My response is: No.
response:  My response is: No.
response:  My response is: No.
response:  My response is: No.
response:  My response is: No.
respo

In [57]:
for index, row in df.iterrows():
    if row['class_label'].lower() == listofpredicts[index]:
        similarity += 1
print(listofpredicts)
similarity_score = (similarity / len(listofpredicts)) * 100
print("Similarity mistral/dataset: ", similarity_score)

['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'yes', 'yes', 'no', 'no', 'no', 'no', 'no', 'yes', 'no', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no