In [23]:
from ollama import Client, Options
from typing import Dict, Any, List
import yaml
import re
import os
from torch import Tensor
import pandas as pd

In [24]:
CHECKWORTHY_PROMPT = """Given a sentence from a political debate or tweet, your task is to predict if it is fact check-worthy statement or not.
 Your response must always start with "My response is: (yes or no based on your prediction)"
 your prediction must be yes or no, and i wont accept you prediction to be "I cannot determine"
 Never write "I'm and AI language model..."

 Sentence: {claim}"""

In [25]:
class Ollama:
    """A class for generating questions and interacting with the Ollama API."""

    def __init__(self, config_path: str = "mistral.yaml"):
        """Initializes the Ollama client and loads necessary configurations."""
        self._ollama_client = Client(timeout=20)
        self._config_path = config_path
        self._config = self._load_config()
        self._stream = self._config.get("stream", False)
        self._model_name = self._config.get("model", "mistral")
        self._llm_options = self._get_llm_config()

    def generate(self, prompt: str) -> str:
        """Generate text using Ollama LLM for the given prompt.

        Args:
            prompt: Prompt for the LLM.

        Returns:
            Response text from an Ollama LLM.
        """
        response = self._ollama_client.generate(
            model=self._model_name,
            prompt=prompt,
            options=self._llm_options,
            stream=self._stream,
        )
        return response.get("response", "").strip()  # type: ignore

    def _load_config(self) -> Dict[str, Any]:
        """Loads configuration from a YAML file.

        Raises:
            FileNotFoundError: If the config file is not found.

        Returns:
            A dictionary with configuration values.
        """
        if not os.path.isfile(self._config_path):
            raise FileNotFoundError(
                f"Config file {self._config_path} not found."
            )
        with open(self._config_path, "r") as file:
            yaml_data = yaml.safe_load(file)
        return yaml_data

    def _get_llm_config(self) -> Options:
        """Extracts and returns the LLM (language learning model) configuration.

        Returns:
            An Options object with the LLM configuration.
        """
        return Options(self._config.get("options", {}))

In [26]:
ollama = Ollama()

In [27]:
listofpredicts = list()
similarity = 0
df = pd.read_csv('../data/processed/processed_dev_test.tsv', sep='\t')
for index, row in df.iterrows():
    response = ollama.generate(CHECKWORTHY_PROMPT.format(claim=row['text']))
    response_words = response.split()
    answer = response_words[3].strip('.,').lower()
    if answer == "yes" or answer == "no":
        listofpredicts.append(response_words[3].strip('.').lower())
    else:
        listofpredicts.append("no")

My response is: No. This sentence does not contain a fact-check worthy statement. It is grammatically incorrect and does not make a clear claim that can be fact-checked.
['My', 'response', 'is:', 'No.', 'This', 'sentence', 'does', 'not', 'contain', 'a', 'fact-check', 'worthy', 'statement.', 'It', 'is', 'grammatically', 'incorrect', 'and', 'does', 'not', 'make', 'a', 'clear', 'claim', 'that', 'can', 'be', 'fact-checked.']
My response is: No. This sentence does not contain a factual claim that can be checked.
['My', 'response', 'is:', 'No.', 'This', 'sentence', 'does', 'not', 'contain', 'a', 'factual', 'claim', 'that', 'can', 'be', 'checked.']
My response is: yes. This statement implies a potential gender-based price discrimination, which is fact check-worthy.
['My', 'response', 'is:', 'yes.', 'This', 'statement', 'implies', 'a', 'potential', 'gender-based', 'price', 'discrimination,', 'which', 'is', 'fact', 'check-worthy.']
My response is: no. This statement does not contain any factual

In [28]:
for index, row in df.iterrows():
    if row['class_label'].lower() == listofpredicts[index]:
        similarity += 1
similarity_score = (similarity / len(listofpredicts)) * 100
print("Similarity mistral/dataset: ", similarity_score)

Similarity mistral/dataset:  59.43396226415094
