## **Performance Metrics to evaluate text generating LLMs:**

- **Fact checking**

In this notebook, we essentially focus on the Fact checking performance metric (more about this on the readme.md file).


### **Steps to run this Notebook:**

- **Step 1:** Generating the queries + saving the results
- **Step 2:** Prompt the text generative LLM - using the prompt given below
- **Step 3:** Computing and calculating the scores & download results
- **Step 4:** Compress all in 1 function

### **Step 1: Generating specific queries + saving results**

In [11]:
# import libraries
import pandas as pd
import requests
import numpy as np
from numpy.linalg import norm

In [72]:
import numpy as np

def similarity_metric(array1, array2):
    # Convert elements in array2 to floats to match the data type of array1
    array2 = array2.astype(float)

    dot_product = np.dot(array1, array2)
    norm_array1 = np.linalg.norm(array1)
    norm_array2 = np.linalg.norm(array2)

    if norm_array1 == 0 or norm_array2 == 0:
        return 0  # Return 0 if any of the arrays has zero norm to avoid division by zero
    return dot_product / (norm_array1 * norm_array2)


In [13]:
def execute_sparql_query(query):
    endpoint_url = "https://query.wikidata.org/sparql"
    headers = {
        'User-Agent': 'Example/1.0 (contact@example.com)',
        'Accept': 'application/sparql-results+json'
    }
    params = {
        'query': query,
        'format': 'json'
    }
    response = requests.get(endpoint_url, params=params, headers=headers)
    if response.status_code == 200:
        results = response.json()
        return results
    else:
        print("Error executing SPARQL query:")
        print(response.text)
        return None

In [54]:
### 1. Age of Barack Obama

query_1 = """
SELECT DISTINCT ?age WHERE {
  wd:Q76 p:P569 ?birthdateStatement.
  ?birthdateStatement ps:P569 ?birthdate.
  BIND((YEAR(NOW()) - YEAR(?birthdate)) - IF(MONTH(NOW()) < MONTH(?birthdate) || (MONTH(NOW()) = MONTH(?birthdate) && DAY(NOW()) < DAY(?birthdate)), 1, 0) AS ?age)
}
"""
results = execute_sparql_query(query_1)
answer_1 = int(results['results']['bindings'][0]['age']['value'])
print("Answer_1:", answer_1)
prompt_1 = "How old is Barack Obama, please give just the decimal number"

### 2. Height of Eiffel Tower
query_2 = """
SELECT DISTINCT ?height WHERE {
  wd:Q243 p:P2048 ?heightStatement.
  ?heightStatement ps:P2048 ?height.
}
"""
results = execute_sparql_query(query_2)
answer_2 = int(results['results']['bindings'][0]['height']['value'])
print("Answer_2:", answer_2)
prompt_2 = "What is the height of the Eiffel Tower, please give just the decimal number"

### 3. Capital of France
query_3 = """
SELECT DISTINCT ?capitalLabel WHERE {
  wd:Q142 wdt:P36 ?capital.
  ?capital rdfs:label ?capitalLabel.
  FILTER(LANG(?capitalLabel) = "en")
}
"""
results = execute_sparql_query(query_3)
answer_3 = results['results']['bindings'][0]['capitalLabel']['value']
print("Answer_3:", answer_3)
prompt_3 = "What is the capital of France?"

### 4. Population of China
query_4 = """
SELECT DISTINCT ?population WHERE {
  wd:Q148 wdt:P1082 ?population.
}
"""
results = execute_sparql_query(query_4)
answer_4 = int(results['results']['bindings'][0]['population']['value'])
print("Answer_4:", answer_4)
prompt_4 = "What is the population of China?"

### 5. Capital of Australia
query_5 = """
SELECT DISTINCT ?capitalLabel WHERE {
  wd:Q408 wdt:P36 ?capital.
  ?capital rdfs:label ?capitalLabel.
  FILTER(LANG(?capitalLabel) = "en")
}
"""
results = execute_sparql_query(query_5)
answer_5 = results['results']['bindings'][0]['capitalLabel']['value']
print("Answer_5:", answer_5)
prompt_5 = "What is the capital of Australia?"

### 6. Capital of Italy
query_6 = """
SELECT DISTINCT ?capitalLabel WHERE {
  wd:Q38 wdt:P36 ?capital.
  ?capital rdfs:label ?capitalLabel.
  FILTER(LANG(?capitalLabel) = "en")
}
"""
results = execute_sparql_query(query_6)
answer_6 = results['results']['bindings'][0]['capitalLabel']['value']
print("Answer_6:", answer_6)
prompt_6 = "What is the capital of Italy?"

### 7. Birthplace of Albert Einstein
query_7 = """
SELECT DISTINCT ?birthplaceLabel WHERE {
  wd:Q937 wdt:P19 ?birthplace.
  ?birthplace rdfs:label ?birthplaceLabel.
  FILTER(LANG(?birthplaceLabel) = "en")
}
"""
results = execute_sparql_query(query_7)
answer_7 = results['results']['bindings'][0]['birthplaceLabel']['value']
print("Answer_7:", answer_7)
prompt_7 = "Where was Albert Einstein born?"

# 8. Number of Films Directed by Steven Spielberg
query_8 = """
SELECT (COUNT(?film) AS ?count) WHERE {
  ?film wdt:P31 wd:Q11424.
  ?film wdt:P57 wd:Q8877.
}
"""
results = execute_sparql_query(query_8)
answer_8 = int(results['results']['bindings'][0]['count']['value'])
print("Answer_8:", answer_8)
prompt_8 = "How many films have been directed by Steven Spielberg?"

# 9. Population of India
query_population_india = """
SELECT DISTINCT ?population WHERE {
  wd:Q668 wdt:P1082 ?population.
}
"""
results = execute_sparql_query(query_population_india)
answer_9 = int(results['results']['bindings'][0]['population']['value'])
print("Answer_9:", answer_9)
prompt_9 = "What is the population of India?"

# 10. Capital of Japan
query_capital_japan = """
SELECT DISTINCT ?capitalLabel WHERE {
  wd:Q17 wdt:P36 ?capital.
  ?capital rdfs:label ?capitalLabel.
  FILTER(LANG(?capitalLabel) = "en")
}
"""
results = execute_sparql_query(query_capital_japan)
answer_10 = results['results']['bindings'][0]['capitalLabel']['value']
print("Answer_10:", answer_10)
prompt_10 = "What is the capital of Japan?"

Answer_1: 62
Answer_2: 300
Answer_3: Paris
Answer_4: 1443497378
Answer_5: Canberra
Answer_6: Rome
Answer_7: Ulm
Answer_8: 36
Answer_9: 1326093247
Answer_10: Tokyo


In [91]:
import pandas as pd

# Create a dictionary with prompts and answers
data = {
    'Prompt': [
        prompt_1,
        prompt_2,
        prompt_3,
        prompt_4,
        prompt_5,
        prompt_6,
        prompt_7,
        prompt_8,
        prompt_9,
        prompt_10
    ],
    'Answer': [
        answer_1,
        answer_2,
        answer_3,
        answer_4,
        answer_5,
        answer_6,
        answer_7,
        answer_8,
        answer_9,
        answer_10
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Display DataFrame
print(df)


                                              Prompt      Answer
0  How old is Barack Obama, please give just the ...          62
1  What is the height of the Eiffel Tower, please...         300
2                     What is the capital of France?       Paris
3                   What is the population of China?  1443497378
4                  What is the capital of Australia?    Canberra
5                      What is the capital of Italy?        Rome
6                    Where was Albert Einstein born?         Ulm
7  How many films have been directed by Steven Sp...          36
8                   What is the population of India?  1326093247
9                      What is the capital of Japan?       Tokyo


In [92]:
df.to_csv("/content/dataset_sample_fact_checking.csv")

### **Step 2:** Prompt the text generative LLM - using the prompt given below


In [57]:
df[["Prompt"]]

Unnamed: 0,Prompt
0,"How old is Barack Obama, please give just the ..."
1,"What is the height of the Eiffel Tower, please..."
2,What is the capital of France?
3,What is the population of China?
4,What is the capital of Australia?
5,What is the capital of Italy?
6,Where was Albert Einstein born?
7,How many films have been directed by Steven Sp...
8,What is the population of India?
9,What is the capital of Japan?


**Query the text generating llm with the following prompt:** (copy the document as mentionned: PASTE_DOCUMENTS_HERE)

```
Please answer the following questions:COPY_QUESTIONS
```
```
please answer in the following format: [answer_1,answer_2]
```

In [None]:
# Please answer the following questions:COPY_QUESTIONS please answer in the following format: arr= [answer_1,answer_2]

In [60]:
arr = [61.85, 324, "Paris", 1444216107, "Canberra", "Rome", "Ulm, Germany", 34, 1393409038, "Tokyo"]
print(arr)

[61.85, 324, 'Paris', 1444216107, 'Canberra', 'Rome', 'Ulm, Germany', 34, 1393409038, 'Tokyo']


In [64]:
ground_truth = [
        answer_1,
        answer_2,
        answer_3,
        answer_4,
        answer_5,
        answer_6,
        answer_7,
        answer_8,
        answer_9,
        answer_10
]

In [65]:
ground_truth

[62,
 300,
 'Paris',
 1443497378,
 'Canberra',
 'Rome',
 'Ulm',
 36,
 1326093247,
 'Tokyo']

### **Step 3:** Computing and calculating the scores & download results


In [77]:
def similarity_metric(array1, array2):
    # Initialize a counter for matching elements
    count = 0
    # Iterate over elements in array1
    for elem1 in array1:
        # If the element is in array2, increment the counter
        if elem1 in array2:
            count += 1
    similarity_score = count / len(array1)
    return similarity_score


In [81]:
array1 = np.array(ground_truth)
array2 = np.array(arr, dtype=str)
similarity_score = round(similarity_metric(array1, array2), 1)
similarity_score

0.4

In [83]:
model_name = "chat_gpt"

In [84]:
new_data = {
    'model_name': model_name,
    'accuracy': [similarity_score]

}
df = pd.DataFrame(new_data)
df.to_csv(f"{model_name}.csv", index=False)

In [86]:
df = pd.read_csv("/content/chat_gpt.csv")
df

Unnamed: 0,model_name,accuracy
0,chat_gpt,0.4


###**Step 4:** Compress all in 1 function

In [120]:
import numpy as np
import pandas as pd

def calculate_and_export_fact_checking(model_name, results):
    def similarity_metric(array1, array2):
        count = 0
        for elem1 in array1:
            if elem1 in array2:
                count += 1
        similarity_score = count / len(array1)
        return similarity_score

    # Example ground_truth array
    df = pd.read_csv("/content/dataset_sample_fact_checking.csv")
    ground_truth = df["Answer"].to_numpy()
    # Convert results to numpy array
    array1 = np.array(ground_truth)
    array2 = np.array(results, dtype=str)

    # Compute similarity score
    similarity_score = round(similarity_metric(array1, array2), 2)

    # Create DataFrame
    new_data = {
        'model_name': [model_name],
        'accuracy': [similarity_score]
    }
    df = pd.DataFrame(new_data)

    # Export to CSV
    df.to_csv(f"/{model_name}.csv", index=False)  # delete . if on colab

In [121]:
df = pd.read_csv("/content/dataset_sample_fact_checking.csv")
model_name = "chat_gpt"
# Generate the results by copy pasting the following prompt:
df[["Prompt"]]
# Click on the icon next to *document* (convert this dataframe to an interactive table) - then select (right) copy table and select JSON and copy - paste the result in the cell below  replacing **PASTE_DOCUMENTS_HERE**
# Then copy the entire cell and prompt the LLM

Unnamed: 0,Prompt
0,"How old is Barack Obama, please give just the ..."
1,"What is the height of the Eiffel Tower, please..."
2,What is the capital of France?
3,What is the population of China?
4,What is the capital of Australia?
5,What is the capital of Italy?
6,Where was Albert Einstein born?
7,How many films have been directed by Steven Sp...
8,What is the population of India?
9,What is the capital of Japan?


In [122]:
# Please answer the following questions:COPY_QUESTIONS please answer in the following format: arr= [answer_1,answer_2]

In [123]:
# Example usage:
arr = [61, 324, "Paris", 1403500365, "Canberra", "Rome", "Ulm, Kingdom of Württemberg, German Empire", 35, 1380004385, "Tokyo"]

In [124]:
calculate_and_export_fact_checking(model_name, arr)

In [125]:
df = pd.read_csv(f"/{model_name}.csv")  # delete . if on colab
df

Unnamed: 0,model_name,accuracy
0,chat_gpt,0.4
