## **Installing Dependencies**

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.2-py3-none-any.whl (485 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading x

## **Importing Libraries**

In [None]:
import requests
import pandas as pd
from datasets import  Dataset, DatasetDict, load_dataset, concatenate_datasets
import random
from collections import Counter

from urllib.request import urlopen
from bs4 import BeautifulSoup
import urllib.parse

In [None]:
# Set the API key and the search engine ID for identification and authorization

API_KEY = open('/content/api_key.txt').read()
SEARCH_ENGINE_ID = open('/content/search_id.txt').read()

In [None]:
# Set the url for requests

url = 'https://www.googleapis.com/customsearch/v1'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## **Data Preparation**

#### **Factbench**

In [None]:
hallu_factbench = pd.read_json('/content/drive/MyDrive/Ceccarelli_MasterThesis&Internship/Master Thesis/data/Factbench.jsonl', lines=True)

In [None]:
# Remove instances without response or factuality label

hallu_factbench = hallu_factbench[~hallu_factbench['response'].isna()].reset_index(drop=True)
hallu_factbench = hallu_factbench[~hallu_factbench['response_label'].isna()].reset_index(drop=True)

In [None]:
# Function that filters the questions based on if the prompt ends with a question mark

def is_question(prompt):
    if prompt.strip().endswith("?"):
        return True
    else:
        return False

hallu_factbench['is_question'] = hallu_factbench['prompt'].apply(is_question)

In [None]:
# Filter the questions

hallu_factbench = hallu_factbench[hallu_factbench['is_question'] == True].reset_index(drop=True)

In [None]:
hallu_factbench

Unnamed: 0,prompt,response,response_label,claims,claim_labels,ability_to_test,source,hallucination_spans,is_question
0,Which country or city has the maximum number o...,The United States has the highest number of nu...,False,[The United States has the highest number of n...,"[True, False, True, True, True, True]",knowledge,factool-qa,,True
1,Who is the CEO of Twitter?,The CEO of Twitter at the time of writing this...,False,"[Jack Dorsey is the CEO of Twitter, Jack Dorse...","[False, True, False, True, True, True, True]",knowledge,factool-qa,,True
2,Is Jupiter more dense than Saturn?,"No, Jupiter is less dense than Saturn. Jupiter...",False,"[Jupiter is less dense than Saturn, Jupiter ha...","[False, True, False, True, False]",knowledge,factool-qa,,True
3,How many sons had eise eisinga in total?,Eise Eisinga was a Dutch mathematician and ast...,False,"[Eise Eisinga was a Dutch mathematician, Eise ...","[True, True, True, False, False, True]",knowledge,factool-qa,,True
4,How many times did Argentina win the FIFA worl...,Argentina have won the FIFA World Cup two time...,False,[Argentina have won the FIFA World Cup two tim...,"[False, True, True, True, True, False, False, ...",knowledge,factool-qa,,True
...,...,...,...,...,...,...,...,...,...
545,What will be the weather tomorrow in London?,"As an AI language model, I cannot browse the i...",False,[],[],knowledge,halueval-dolly15k,"[As an AI language model, I cannot browse the ...",True
546,What is the syntax for creating an array in Ja...,The syntax for creating an array in JavaScript...,True,[],[],knowledge,halueval-dolly15k,[],True
547,How much does a loaf of bread cost?,"As an AI language model, I don't have access t...",False,[],[],knowledge,halueval-dolly15k,"[As an AI language model, I don't have access ...",True
548,What is the difference between a high pressure...,A high pressure system is an area of air that ...,True,[],[],knowledge,halueval-dolly15k,[],True


In [None]:
# Check and eliminates null labels

hallu_factbench = hallu_factbench[hallu_factbench['response_label']!= 'NA'].reset_index()

In [None]:
hallu_factbench.groupby('response_label').count()

Unnamed: 0_level_0,index,prompt,response,claims,claim_labels,ability_to_test,source,hallucination_spans,is_question
response_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
False,221,221,221,221,221,221,221,74,221
True,327,327,327,327,327,327,327,190,327


In [None]:
# Choose 50 random examples from the True class and 50 random examples from the False class, in order to balance the dataset

random.seed(777)

num_classes = hallu_factbench["response_label"].nunique()
samples_per_class = 100 // num_classes

hallu_factbench_sample = hallu_factbench.groupby("response_label").sample(n=samples_per_class, random_state=42)

# Set the dataset for the examples to be shown to the LLM as the ones that were not chosen previously
hallu_factbench_examples = hallu_factbench.drop(hallu_factbench_sample.index).reset_index(drop=True)

In [None]:
hallu_factbench_sample.reset_index(drop=True, inplace=True)
hallu_factbench_examples.reset_index(drop=True, inplace=True)

In [None]:
# Check for the correctness of the rebalance
Counter(hallu_factbench_sample['response_label'])

Counter({False: 50, True: 50})

In [None]:
# Consider only the prompt, the response and the factuality label

hallu_factbench_sample = hallu_factbench_sample[['prompt', 'response', 'response_label']]
hallu_factbench_examples = hallu_factbench_examples[['prompt', 'response', 'response_label']]

In [None]:
hallu_factbench_sample = Dataset.from_pandas(hallu_factbench_sample)
hallu_factbench_examples = Dataset.from_pandas(hallu_factbench_examples[0:10])

In [None]:
hallu_factbench_sample[0]

{'prompt': 'Given this paragraph about autonomous buildings, why would they be safer during a military attack?',
 'response': 'Autonomous buildings are designed to not rely on external systems such as electricity, water, and gas grids. They are equipped with their own systems for heating, cooling, water supply, and waste management. This means that even during a military attack, when external systems may be compromised, autonomous buildings can continue to operate and provide a safe and sustainable haven for the people inside. Additionally, their self-sufficient systems may be more resilient to damage inflicted during the attack, resulting in a safer outcome for the occupants.',
 'response_label': False}

In [None]:
# Function that make API requests to retrieve the links

def get_knowledge_url(example):
    query = example['prompt'] # Defines the query as the prompt that generated the response
    params = {                # Parameters for the requests
        'key': API_KEY,
        'cx': SEARCH_ENGINE_ID,
        'q': query
    }
    response = requests.get(url, params=params) # API requests to the Google Search engine
    response.raise_for_status()                  # Check for the correctness of the requests
    results = response.json()                   # Responses in JSON format

    if 'items' in results:                      # if a result exists...
        first_result = results['items'][0]      # ... take the first...
        knowledge_url = first_result['link']    # ... and take the URL
        return {'knowledge_url' : knowledge_url}
    else:
        return {'knowledge_url' : None}

In [None]:
# Get the URLs

hallu_factbench_sample = hallu_factbench_sample.map(get_knowledge_url)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
# Function that extracts the HTML body of the URL

def get_knowledge(example):
  url = example.get('knowledge_url')
  if url:
    try:                              # if an URL exists, try to open it

      headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" # Parameters for requests
                }
      response = requests.get(url, headers=headers) # API requests
      content = response.content
      soup = BeautifulSoup(content.decode('utf-8'), 'html.parser')  # Decode the body of the HTML
      knowledge = soup.get_text()                                   # Extract the text of the HTML
      return {'knowledge' : knowledge}
    except:
      return {'knowledge' : ""}
  else:
    return {'knowledge' : ""}

In [None]:
# Get the knowledge from the URLs
hallu_factbench_sample = hallu_factbench_sample.map(get_knowledge)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
hallu_factbench_knowledge = hallu_factbench_sample.to_pandas() # Convert everything to Pandas

In [None]:
# Check for examples without knowledge

hallu_factbench_knowledge[hallu_factbench_knowledge['knowledge'] == '']

Unnamed: 0,prompt,response,response_label,knowledge_url,knowledge
52,Why do firms advertise? Even when goods are in...,Firms advertise to create brand recognition an...,True,https://www.cemus.uu.se/wp-content/uploads/201...,
93,How does the rate of photosynthesis vary with ...,The rate of photosynthesis increases with incr...,True,https://www.esalq.usp.br/lepse/imgs/conteudo_t...,


In [None]:
# Extracts knowledge for the examples to be shown to the LLM for few-shot prompting

hallu_factbench_knowledge_examples = hallu_factbench_examples.map(get_knowledge_url)
hallu_factbench_knowledge_examples = hallu_factbench_knowledge_examples.map(get_knowledge)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [None]:
# Check for examples without knowledge

hallu_factbench_knowledge_examples = hallu_factbench_knowledge_examples.to_pandas()
hallu_factbench_knowledge_examples[hallu_factbench_knowledge_examples['knowledge'] == '']

Unnamed: 0,prompt,response,response_label,knowledge_url,knowledge


## **Saving new DFs with knowledge**

In [None]:
path = '/content/drive/MyDrive/Ceccarelli_MasterThesis&Internship/Master Thesis'

In [None]:
with open(path + "/factbench_w_knowledge.csv", "w") as f:
    hallu_factbench_knowledge.to_csv(f, index=False)

In [None]:
with open(path + "/factbench_w_knowledge_examples.csv", "w") as f:
    hallu_factbench_knowledge_examples.to_csv(f, index=False)

#### **Factalign**

In [None]:
hallu_factalign = load_dataset('chaoweihuang/factalign-gemma2-f1_0.75', trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/839 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/4.11M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/738k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2177 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/385 [00:00<?, ? examples/s]

In [None]:
# Extraction of the prompt and the response from the JSON format

def get_question_answer(example):
    return {
        "prompt": example["prompt"][0]["content"],
        "completion": example["completion"][0]["content"]
    }

hallu_factalign = hallu_factalign.map(get_question_answer)

Map:   0%|          | 0/2177 [00:00<?, ? examples/s]

Map:   0%|          | 0/385 [00:00<?, ? examples/s]

In [None]:
# Convert to Pandas Dataframe for better handling

hallu_factalign_train = Dataset.to_pandas(hallu_factalign['train'])
hallu_factalign_test = Dataset.to_pandas(hallu_factalign['test'])

In [None]:
# Remove the instances without response

hallu_factalign_train = hallu_factalign_train[~hallu_factalign_train['completion'].isna()].reset_index()
hallu_factalign_test = hallu_factalign_test[~hallu_factalign_train['completion'].isna()].reset_index()

  hallu_factalign_test = hallu_factalign_test[~hallu_factalign_train['completion'].isna()].reset_index()


In [None]:
# Remove the suffix "Provide as many specific details and examples as possible (such as names of people, numbers, events, locations, dates, times, etc.)"

def extract_question(prompt):
    return prompt.split("?")[0] + "?" if "?" in prompt else prompt

hallu_factalign_train['prompt'] = hallu_factalign_train['prompt'].apply(extract_question)
hallu_factalign_test['prompt'] = hallu_factalign_test['prompt'].apply(extract_question)

In [None]:
# Function that filters the questions based on if the prompt ends with a question mark

def is_question(prompt):
    if prompt.strip().endswith("?"):
        return True
    else:
        return False

hallu_factalign_train['is_question'] = hallu_factalign_train['prompt'].apply(is_question)
hallu_factalign_test['is_question'] = hallu_factalign_test['prompt'].apply(is_question)

In [None]:
hallu_factalign_train['prompt'][1]

'What is the Strait of Gibraltar?'

In [None]:
# Filter the questions

hallu_factalign_train = hallu_factalign_train[hallu_factalign_train['is_question'] == True].reset_index(drop=True)
hallu_factalign_test = hallu_factalign_test[hallu_factalign_test['is_question'] == True].reset_index(drop=True)

In [None]:
# Randomly sampling 100 examples to be classified, using a seed for consistency and reproducibility

random.seed(777)

hallu_factalign_sample = hallu_factalign_test.sample(n=100, random_state=42).reset_index(drop=True)

# Set the dataset for the examples to be shown to the LLM
hallu_factalign_examples = hallu_factalign_train

In [None]:
# Consider only the prompt, the response and the factuality label
hallu_factalign_sample = hallu_factalign_sample[['prompt', 'completion', 'label']]
hallu_factalign_examples = hallu_factalign_examples[['prompt', 'completion', 'label']]

In [None]:
# Check the distribution of the classes

Counter(hallu_factalign_sample['label'])

Counter({False: 47, True: 53})

In [None]:
hallu_factalign_sample = Dataset.from_pandas(hallu_factalign_sample) # Convert to Dataset format
hallu_factalign_sample[0]

{'prompt': 'What is the Kyoto School?',
 'completion': "The Kyoto School, also known as the Kyoto-Gakuen School or the Kyoto School of Economics and Management, was a group of Japanese economists who emerged in the 1960s and 1970s. The school was named after the city of Kyoto, where many of its members studied and taught. The Kyoto School was characterized by its emphasis on microeconomic theory and its rejection of Keynesian economics and other mainstream macroeconomic theories.\n\nThe members of the Kyoto School included a number of prominent Japanese economists, such as:\n\n* Haruhiko Kuroda (1924-2016)\n* Masaru Imai (1932-2010)\n* Akira Kubo (1925-2014)\n* Hirofumi Koyama (1926-2011)\n* Kenzo Nakamura (1925-2019)\n* Nobuo Okishio (1924-2011)\n* Yoshio Ohtsuka (1925-2012)\n* Shigeru Ono (1924-2015)\n* Yoshiki Toda (1939-2015)\n* Akira Watanabe (1925-2011)\n\nThe Kyoto School was notable for its emphasis on empirical research and its rejection of mainstream macroeconomic theories, s

In [None]:
hallu_factalign_examples = Dataset.from_pandas(hallu_factalign_examples) # Convert to Dataset format
hallu_factalign_examples[0]

{'prompt': 'What is the Phaedrus Dialogue?',
 'completion': 'The Phaedrus Dialogue is a philosophical text written by the ancient Greek philosopher Phaedrus. It is a dialogue between Phaedrus and a companion who is called "Socrates." The dialogue is considered one of the most important works in the history of philosophy, and it is known for its exploration of the nature of knowledge, the role of the individual in society, and the relationship between philosophy and religion.\n\nThe Phaedrus Dialogue is set in a garden, and Phaedrus and Socrates are joined by a third person, who is called "Hermotimus." The dialogue is divided into two parts, and each part is divided into two books. The first part of the dialogue is called "The Book of the Gods," and it deals with the nature of the gods and the role of religion in human life. The second part of the dialogue is called "The Book of the Men," and it deals with the nature of human beings and their relationship to the world around them.\n\nIn

In [None]:
# Get the URLs

hallu_factalign_sample = hallu_factalign_sample.map(get_knowledge_url)

In [None]:
# Get the knowledge from the URLs

hallu_factalign_sample = hallu_factalign_sample.map(get_knowledge)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
hallu_factalign_knowledge = hallu_factalign_sample.to_pandas() # Convert to Pandas

In [None]:
hallu_factalign_knowledge.head()

Unnamed: 0,prompt,completion,label,knowledge_url,knowledge
0,What is the Kyoto School?,"The Kyoto School, also known as the Kyoto-Gaku...",False,https://en.wikipedia.org/wiki/Kyoto_School,\n\n\n\nKyoto School - Wikipedia\n\n\n\n\n\n\n...
1,What is the Quadrilateral Security Dialogue?,The Quadrilateral Security Dialogue (QSD) is a...,False,https://www.cfr.org/in-brief/quad-indo-pacific...,\n\n\n\n\n\n\n\n\n\nThe Quad in the Indo-Pacif...
2,Who is Simone de Beauvoir?,"Simone de Beauvoir was a French philosopher, w...",True,https://en.wikipedia.org/wiki/Simone_de_Beauvoir,\n\n\n\nSimone de Beauvoir - Wikipedia\n\n\n\n...
3,What is Fermat's Last Theorem?,Fermat's Last Theorem is a famous mathematical...,False,https://en.wikipedia.org/wiki/Fermat%27s_Last_...,\n\n\n\nFermat's Last Theorem - Wikipedia\n\n\...
4,What is the moral dispute over the use of soli...,The use of solitary confinement in prisons has...,True,https://www.urban.org/sites/default/files/2022...,


In [None]:
# Check for examples without knowledge

hallu_factalign_knowledge[hallu_factalign_knowledge['knowledge'] == '']

Unnamed: 0,prompt,completion,label,knowledge_url,knowledge
4,What is the moral dispute over the use of soli...,The use of solitary confinement in prisons has...,True,https://www.urban.org/sites/default/files/2022...,
54,What is the Stuxnet worm?,Stuxnet is a computer worm that was first disc...,True,https://www.trellix.com/security-awareness/ran...,


In [None]:
# Select the first 10 examples to be shown to the model
hallu_factalign_examples = hallu_factalign_examples.select(range(10))

In [None]:
# Extracts knowledge for the examples to be shown to the LLM for few-shot prompting

hallu_factalign_knowledge_examples = hallu_factalign_examples.map(get_knowledge_url)
hallu_factalign_knowledge_examples = hallu_factalign_knowledge_examples.map(get_knowledge)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [None]:
# Check for examples without knowledge

hallu_factalign_knowledge_examples = hallu_factalign_knowledge_examples.to_pandas()
hallu_factalign_knowledge_examples[hallu_factalign_knowledge_examples['knowledge'] == '']

Unnamed: 0,prompt,completion,label,knowledge_url,knowledge


## **Saving new DFs with knowledge**

In [None]:
path = '/content/drive/MyDrive/Ceccarelli_MasterThesis&Internship/Master Thesis'

with open(path + "/factalign_w_knowledge.csv", "w") as f:
    hallu_factalign_knowledge.to_csv(f, index=False)

In [None]:
path = '/content/drive/MyDrive/Ceccarelli_MasterThesis&Internship/Master Thesis'

with open(path + "/factalign_w_knowledge_examples.csv", "w") as f:
    hallu_factalign_knowledge_examples.to_csv(f, index=False)

#### **Felm**

In [None]:
# Considers only the domain related to science and world knowledge
science = load_dataset('hkust-nlp/felm', 'science', trust_remote_code=True)
wk = load_dataset('hkust-nlp/felm', 'wk', trust_remote_code=True)

README.md:   0%|          | 0.00/5.41k [00:00<?, ?B/s]

felm.py:   0%|          | 0.00/4.14k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/125 [00:00<?, ? examples/s]

0000.parquet:   0%|          | 0.00/104k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/184 [00:00<?, ? examples/s]

In [None]:
# Concatenate the data from the domains
hallu_felm = concatenate_datasets([science['test'], wk['test']])

In [None]:
# Label each instances: if all the segmented responses are true, then the full passage is true, on contrary if at least one segmented response is false,
# then the entire passage is false

def compute_new_column(example):
    return {"hallucination": False if False in example["labels"] else True}

hallu_felm = hallu_felm.map(compute_new_column)

Map:   0%|          | 0/309 [00:00<?, ? examples/s]

In [None]:
# Convert to Pandas for better handling
hallu_felm = hallu_felm.to_pandas()

# Remove instances without the response
hallu_felm = hallu_felm[~hallu_felm['response'].isna()].reset_index()

In [None]:
# Function that filters the questions based on if the prompt ends with a question mark
def is_question(prompt):
    if prompt.strip().endswith("?"):
        return True
    else:
        return False

hallu_felm['is_question'] = hallu_felm['prompt'].apply(is_question)

In [None]:
# Filter the questions
hallu_felm = hallu_felm[hallu_felm['is_question'] == True].reset_index(drop=True)

In [None]:
# Choose 50 random examples from the True class and 50 random examples from the False class, in order to balance the dataset
random.seed(777)

num_classes = hallu_felm["hallucination"].nunique()
samples_per_class = 100 // num_classes

hallu_felm_sample = hallu_felm.groupby("hallucination").sample(n=samples_per_class, random_state=42)

# Set the dataset for the examples to be shown to the LLM as the ones that were not chosen previously
hallu_felm_examples = hallu_felm.drop(hallu_felm_sample.index).reset_index(drop=True)

In [None]:
# Consider only the prompt, the response and the factuality label
hallu_felm_sample = hallu_felm_sample[['prompt', 'response', 'hallucination']]
hallu_felm_examples = hallu_felm_examples[['prompt', 'response', 'hallucination']]

In [None]:
# Check for the correctness of the rebalance
Counter(hallu_felm_sample['hallucination'])

Counter({False: 50, True: 50})

In [None]:
random.seed(777)

hallu_felm_sample = Dataset.from_pandas(hallu_felm_sample).shuffle(42) # Convert to Dataset format
hallu_felm_sample[0]

{'prompt': 'This Jewish American studied at NYU, was born and lived in New York City, and became famous in the mid 1980s for a controversial act. His name is "Bernie" what?',
 'response': 'The person you are referring to is Bernie Goetz, also known as the "Subway Vigilante".',
 'hallucination': True,
 '__index_level_0__': 233}

In [None]:
hallu_felm_examples = Dataset.from_pandas(hallu_felm_examples) # Convert to Dataset format

In [None]:
# Get the URLs
hallu_felm_sample = hallu_felm_sample.map(get_knowledge_url)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
# Get the knowledge from the URLs
hallu_felm_sample = hallu_felm_sample.map(get_knowledge)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
hallu_felm_knowledge = hallu_felm_sample.to_pandas() # Convert to Pandas

In [None]:
hallu_felm_knowledge.head()

Unnamed: 0,prompt,response,hallucination,__index_level_0__,knowledge_url,knowledge
0,"This Jewish American studied at NYU, was born ...",The person you are referring to is Bernie Goet...,True,233,https://arxiv.org/html/2306.03341v6,\n\n\n\nInference-Time Intervention: Eliciting...
1,What's a country where most people love playin...,One country where golf is very popular is Scot...,False,209,https://www.quora.com/Why-do-people-play-golf-...,Why do people play golf? What attracts people ...
2,"Who said, ""Sometimes a cigar is just a cigar""?","Sigmund Freud, the founder of psychoanalysis, ...",True,216,https://www.freud.org.uk/2020/04/22/freud-and-...,\n\n\n\n \n\n\n\n\n\n\n\nFreud and his Cigars...
3,What is the highest mountain in the solar system?,The highest mountain in the solar system is Ol...,False,171,https://coolcosmos.ipac.caltech.edu/ask/199-Wh...,\n\n\n\nWhere is the highest mountain in our S...
4,How long can the human body solely survive on ...,The human body can survive for about 3-5 days ...,False,13,https://www.quora.com/Can-a-person-survive-on-...,Can a person survive on only water for a whole...


In [None]:
# Check for examples without knowledge

hallu_felm_knowledge[hallu_felm_knowledge['knowledge'] == '']

Unnamed: 0,prompt,response,hallucination,__index_level_0__,knowledge_url,knowledge
23,What is the half life time of Uranium-235?,The half-life of Uranium-235 is approximately ...,True,63,https://www.ansto.gov.au/media/2237/download?i...,
27,What is the world record for crossing the Engl...,"As an AI language model, I do not have access ...",False,128,https://www.sify.com/ai-analytics/the-hilariou...,
54,You are given a message (m) and its OTP encryp...,"No, it is not possible to compute the OTP key ...",False,70,https://crypto.stanford.edu/~dabo/courses/Onli...,
76,"If there were a leverage effect in practice, w...","If there were a leverage effect in practice, t...",True,64,https://www.accc.gov.au/system/files/ACCC+comm...,


In [None]:
# Select the first 10 examples to be shown to the model

hallu_felm_examples = hallu_felm_examples.select(range(10))

In [None]:
# Extracts knowledge for the examples to be shown to the LLM for few-shot prompting

hallu_felm_knowledge_examples = hallu_felm_examples.map(get_knowledge_url)
hallu_felm_knowledge_examples = hallu_felm_knowledge_examples.map(get_knowledge)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [None]:
# Convert to Pandas
hallu_felm_knowledge_examples = hallu_felm_knowledge_examples.to_pandas()

# Check for examples without knowledge
hallu_felm_knowledge_examples[hallu_felm_knowledge_examples['knowledge'] == '']

Unnamed: 0,prompt,response,hallucination,knowledge_url,knowledge


## **Saving new DFs with knowledge**

In [None]:
path = '/content/drive/MyDrive/Ceccarelli_MasterThesis&Internship/Master Thesis'

with open(path + "/felm_w_knowledge.csv", "w") as f:
    hallu_felm_knowledge.to_csv(f, index=False)

In [None]:
path = '/content/drive/MyDrive/Ceccarelli_MasterThesis&Internship/Master Thesis'

with open(path + "/felm_w_knowledge_examples.csv", "w") as f:
    hallu_felm_knowledge_examples.to_csv(f, index=False)