# **Citation Generation and Generation Evaluation**

In [1]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [2]:
%pip install --quiet --upgrade bitsandbytes langchain langchain-community langchain-huggingface transformers beautifulsoup4 faiss-gpu rank_bm25 lark langchain_groq datasets ragas

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m124.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m111.0/111.0 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
from langchain_core.documents import Document
from langchain.retrievers import EnsembleRetriever # Supports Ensembling of results from multiple retrievers
from langchain_community.retrievers import BM25Retriever
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface.llms import HuggingFacePipeline
from langchain_huggingface import ChatHuggingFace
from pydantic import BaseModel, Field
from typing import List
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import BitsAndBytesConfig
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from google.colab import userdata
from langchain import PromptTemplate
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
import nltk
from nltk.corpus import stopwords
import re
import pandas as pd
import os
import json
from google.colab import files
import time
from langchain_groq import ChatGroq
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from sentence_transformers import CrossEncoder
import torch
from datasets import load_dataset
from transformers import pipeline
from ragas import SingleTurnSample
from ragas.metrics import ResponseRelevancy, LLMContextRecall
from ragas.llms import LangchainLLMWrapper

In [5]:
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

In [None]:
# https://huggingface.co/google/t5_xxl_true_nli_mixture
# 45G!
# pipe = pipeline("text2text-generation", model="google/t5_xxl_true_nli_mixture", device = 0)

<br/>
<br/>
<br/>

## **Method: Direct Prompting with statement-wise citations**

Not all models support tool calling/function calling or have native JSON mode support. This method explores the use of direct prompting to ask the model to use a specific format

Additionally, we use few-shot prompting to enable in-context learning

### **Simple Experiment Data to test and observe behaviour**

In [None]:
docs = [
    Document(
        page_content="The best hikes in Norway include the Reinebringen hike in the Lofoten islands. At a modest 448 meters high, Reinebringen is far from one of the highest peaks on the Lofoten islands. Yet this is more than made up for by the iconic view from the summit of Reine. It is not suitable for winter! Also, the trail can be quite demanding as the steps are quite steep.",
        metadata={'country': 'Norway', 'source': 'visitNorway', 'link': 'https://www.visitnorway.com/'},
    ),
    Document(
        page_content="The most famous hikes in Norway include Preikestolen (a beautiful fjord), Kjeragbolten (with a famous boulder stuck between a mountain crevasse) as well as Trolltunga which resembes a tongue.",
        metadata={'country': 'Norway', 'source': 'norwayhikes', 'link': 'https://www.norwayhikes.com/'},
    ),
    Document(
        page_content="The famous street food of Iceland is the Hotdog! It is called the Baejarins Beztu Pylsur hot dog is made of a mix of lamb, beef and pork. Other delicacies of iceland include Fish and Chips as well as Tommi's burger.",
        metadata={'country': 'Iceland', 'source': 'IcelandTours', 'link': 'https://www.icelandtours.com/'},
    ),
    Document(
        page_content="Iceland is very famous for its fish freshly caught from the arctic sea. Famous dishes include the classic fish and chips, arctic cod and salmon soup!",
        metadata={'country': 'Iceland', 'source': 'IcelandGov', 'link': 'https://www.welcometoiceland.com/'},
    ),
    Document(
        page_content="The pasteries and bread in Iceland are fantastic, there are many bakeries in Iceland. One of the most popular bread is called dark rye bread ",
        metadata={'country': 'Iceland', 'source': 'IcelandFood', 'link': 'https://www.icelandicdelicacies.com/'},
    ),
    Document(
        page_content="Transportation within Reykjavik is fairly convenient as there is a public bus service called BSI. All you need to do is to download their mobile app, follow the instructions, and you're good to go. Transportation to places outside Reykjavik however requires a car. Some options include car rentals as well as booking bus tours.",
        metadata={'country': 'Iceland', 'source': 'IcelandBuses', 'link': 'https://www.icelandbuses.com/'},
    ),
    Document(
        page_content="Driving in Iceland is an amazing experience - open roads, majestic volcanos and towering mountains along the way, sheep and arctic foxes make it a great experience. All you need is an international driving license. And, please drive slowly during the winter!",
        metadata={'country': 'Iceland', 'source': 'IcelandBuses', 'link': 'https://www.icelandbuses.com/'},
    ),
    Document(
        page_content="Iceland is a must-go to place for adventurous people! You can hike active volcanoes, drive a jeep through the volcanic ash, explore a natural ice cave, see waterfalls. There are so many opportunities for an adventurer.",
        metadata={'country': 'Iceland', 'source': 'IcelandAdventures', 'link': 'https://www.icelandadventures.com/'},
    ),
    Document(
        page_content="One of the most famous diving sites in the world, Silfra, is located in Iceland! It is the only diving site in the world where you can dive between 2 tectonic plates. The water is also so fresh that you can drink from it, it is the best water that you will ever taste.",
        metadata={'country': 'Iceland', 'source': 'IcelandDiving', 'link': 'https://www.icelanddiving.com/'},
    ),
    Document(
        page_content="One of the most scenic hikes in Switzerland can be done at Grindelwald. At the summit of Grindelwald, a beautiful lake awaits you. However, you can only see this lake during summer time. Other notable hikes include Zermatt, i.e. the matterhorn and Lauterbrunnen.",
        metadata={'country': 'Switzerland', 'source': 'Swisstravels', 'link': 'https://www.switzerlandtravels.com/'},
    ),
    Document(
        page_content="The matterhorn at zermatt is a must-go for hiking enthusiasts. It is the icon of the famous chocolate: Toblerone. However, it is recommended to hire a mountain guide to go with you as it can be very dangerous!",
        metadata={'country': 'Switzerland', 'source': 'SwissHikes', 'link': 'https://www.switzerlandhiking.com/'},
    ),
]

In [None]:
question = "What can I eat in Iceland?"

In [None]:
prompt_v1 = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
For each statement in your answer, **you must include at least one inline numbered citation** (e.g., [1], [2]) for the Document Objects supporting it.
Always reset the numbering for citations to start from **1** for each response, and ensure the numbering increases sequentially.
Statements without sufficient support from the context should not be included in the answer.

At the bottom, provide the full citations corresponding to each number, but include **only the page content** of the Document Object (exclude metadata).

Write your response in a natural and coherent way, ensuring that the statements flow logically and transition smoothly between ideas. Use connecting words and phrases (e.g., "Additionally," "Furthermore," "For instance," "As a result,") to enhance readability.

### **IMPORTANT**:
1. Write your entire response as **a single paragraph**. Avoid using any new line characters in the response. All statements should flow naturally and seamlessly into one another.
2. If the answer cannot be found in the context, say "I don't know." **Do not include unsupported statements or make up information.**

Respond in the following format:
---
Statement 1 [1]. Statement 2 [2, 3].

Citations:
[1]: <Page Content 1> //Only the page content of the document
[2]: <Page Content 2>
[3]: <Page Content 3>
---

Here are a few examples:

---
The best hikes in Norway include the Reinebringen hike in the Lofoten islands, Preikestolen, Kjeragbolten, and Trolltunga [1, 2]. The Reinebringen hike, although not one of the highest peaks, offers an iconic view of the Reine fjord from its summit [1]. Preikestolen, Kjeragbolten, and Trolltunga are famous for their stunning fjord views and unique geological formations, such as a boulder stuck between a mountain crevasse and a tongue-shaped rock [2].

Citations:
[1]: "The best hikes in Norway include the Reinebringen hike in the Lofoten islands. At a modest 448 meters high, Reinebringen is far from one of the highest peaks on the Lofoten islands. Yet this is more than made up for by the iconic view from the summit of Reine. It is not suitable for winter! Also, the trail can be quite demanding as the steps are quite steep."
[2]: "The most famous hikes in Norway include Preikestolen (a beautiful fjord), Kjeragbolten (with a famous boulder stuck between a mountain crevasse) as well as Trolltunga which resembles a tongue."
---

---
In Switzerland, you can embark on several scenic hikes [1]. One such hike is at Grindelwald, where at the summit, you will find a stunning lake, but it's only visible during the summer [1]. Other notable hikes include Zermatt, also known as the Matterhorn [1], and Lauterbrunnen [1]. For those seeking a challenging hike, the Matterhorn at Zermatt is a must-go [2]. This iconic peak is featured on the Toblerone chocolate and is best explored with a mountain guide due to the inherent dangers [2].

Citations:
[1]: "One of the most scenic hikes in Switzerland can be done at Grindelwald. At the summit of Grindelwald, a beautiful lake awaits you. However, you can only see this lake during summer time. Other notable hikes include Zermatt, i.e. the matterhorn and Lauterbrunnen."
[2]: "The matterhorn at zermatt is a must-go for hiking enthusiasts. It is the icon of the famous chocolate: Toblerone. However, it is recommended to hire a mountain guide to go with you as it can be very dangerous!"
---

---
In Iceland, you can participate in a variety of adventurous activities [1]. For instance, you can hike active volcanoes and explore a natural ice cave, offering unique geological experiences [1]. Driving in Iceland is also an amazing adventure, with open roads, majestic volcanoes, and towering mountains as your backdrop, and the possibility of encountering sheep and arctic foxes along the way [2]. Additionally, Iceland is known for its exceptional diving sites [3]. One of the most famous in the world, Silfra, is located in Iceland [3]. It is the only diving site where you can dive between two tectonic plates, and the water is so fresh that you can drink it, promising an unparalleled tasting experience [3].

Citations:
[1]: "Iceland is a must-go to place for adventurous people! You can hike active volcanoes, drive a jeep through the volcanic ash, explore a natural ice cave, see waterfalls. There are so many opportunities for an adventurer."
[2]: "Driving in Iceland is an amazing experience - open roads, majestic volcanos and towering mountains along the way, sheep and arctic foxes make it a great experience. All you need is an international driving license. And, please drive slowly during the winter!"
[3]: "One of the most famous diving sites in the world, Silfra, is located in Iceland! It is the only diving site in the world where you can dive between 2 tectonic plates. The water is also so fresh that you can drink from it, it is the best water that you will ever taste."
---

Question: {question}

Context: {context}

Helpful Answer:
"""


In [None]:
prompt = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question. Each statement in your answer must be **entailable** by at least one of the provided citations, meaning the information in the statement must align directly and logically with the cited context.

### **IMPORTANT REQUIREMENTS**:
1. **Inline Numbered Citations**: For each statement in your answer, **you must include at least one inline numbered citation** (e.g., [1], [2]). Statements without sufficient support from the context should not be included in the answer.
2. **Entailment Verification**: Ensure that every statement (hypothesis) is directly entailed by the cited context (premise). Use only information explicitly stated or strongly implied by the citation. Do not include unsupported information or make up details.
3. **Sequential Citation Numbering**: Reset the numbering for citations to start from **1** for each response, and ensure the numbering increases sequentially.
4. **Single Paragraph**: Write your response as **a single paragraph**. Avoid using any new line characters in the response. All statements should flow naturally and seamlessly into one another.

At the bottom, provide the full citations corresponding to each number, but include **only the page content** of the Document Object (exclude metadata). Use citations exactly as provided in the context.

Respond in the following format:
---
Statement 1 [1]. Statement 2 [2, 3].

Citations:
[1]: <Page Content 1> //Only the page content of the document
[2]: <Page Content 2>
[3]: <Page Content 3>
---

### **HOW TO APPROACH**:
- Before writing your answer, check if each statement is **entailable** by verifying that it is supported by one or more citations.
- If the context does not support a statement, exclude it from your answer and say "I don't know."

Here are a few examples:

---
The best hikes in Norway include the Reinebringen hike in the Lofoten islands, Preikestolen, Kjeragbolten, and Trolltunga [1, 2]. The Reinebringen hike, although not one of the highest peaks, offers an iconic view of the Reine fjord from its summit [1]. Preikestolen, Kjeragbolten, and Trolltunga are famous for their stunning fjord views and unique geological formations, such as a boulder stuck between a mountain crevasse and a tongue-shaped rock [2].

Citations:
[1]: "The best hikes in Norway include the Reinebringen hike in the Lofoten islands. At a modest 448 meters high, Reinebringen is far from one of the highest peaks on the Lofoten islands. Yet this is more than made up for by the iconic view from the summit of Reine. It is not suitable for winter! Also, the trail can be quite demanding as the steps are quite steep."
[2]: "The most famous hikes in Norway include Preikestolen (a beautiful fjord), Kjeragbolten (with a famous boulder stuck between a mountain crevasse) as well as Trolltunga which resembles a tongue."
---

---
In Switzerland, you can embark on several scenic hikes [1]. One such hike is at Grindelwald, where at the summit, you will find a stunning lake, but it's only visible during the summer [1]. Other notable hikes include Zermatt, also known as the Matterhorn [1], and Lauterbrunnen [1]. For those seeking a challenging hike, the Matterhorn at Zermatt is a must-go [2]. This iconic peak is featured on the Toblerone chocolate and is best explored with a mountain guide due to the inherent dangers [2].

Citations:
[1]: "One of the most scenic hikes in Switzerland can be done at Grindelwald. At the summit of Grindelwald, a beautiful lake awaits you. However, you can only see this lake during summer time. Other notable hikes include Zermatt, i.e. the matterhorn and Lauterbrunnen."
[2]: "The matterhorn at zermatt is a must-go for hiking enthusiasts. It is the icon of the famous chocolate: Toblerone. However, it is recommended to hire a mountain guide to go with you as it can be very dangerous!"
---

---
In Iceland, you can participate in a variety of adventurous activities [1]. For instance, you can hike active volcanoes and explore a natural ice cave, offering unique geological experiences [1]. Driving in Iceland is also an amazing adventure, with open roads, majestic volcanoes, and towering mountains as your backdrop, and the possibility of encountering sheep and arctic foxes along the way [2]. Additionally, Iceland is known for its exceptional diving sites [3]. One of the most famous in the world, Silfra, is located in Iceland [3]. It is the only diving site where you can dive between two tectonic plates, and the water is so fresh that you can drink it, promising an unparalleled tasting experience [3].

Citations:
[1]: "Iceland is a must-go to place for adventurous people! You can hike active volcanoes, drive a jeep through the volcanic ash, explore a natural ice cave, see waterfalls. There are so many opportunities for an adventurer."
[2]: "Driving in Iceland is an amazing experience - open roads, majestic volcanos and towering mountains along the way, sheep and arctic foxes make it a great experience. All you need is an international driving license. And, please drive slowly during the winter!"
[3]: "One of the most famous diving sites in the world, Silfra, is located in Iceland! It is the only diving site in the world where you can dive between 2 tectonic plates. The water is also so fresh that you can drink from it, it is the best water that you will ever taste."
---

Question: {question}

Context: {context}

Helpful Answer:
"""

In [None]:
llm = ChatGroq()
llm_pipeline = llm | StrOutputParser()
response = llm_pipeline.invoke(prompt.format(question=question,context=docs))

In [None]:
response

'In Iceland, you can eat a variety of dishes, some of which are famous and unique to the country [3, 4]. The Baejarins Beztu Pylsur hot dog is a well-known street food, made of a mix of lamb, beef, and pork [3]. Other popular delicacies include Fish and Chips and Tommi\'s burger [3]. Iceland is particularly famous for its fresh fish, sourced directly from the Arctic sea, with dishes like the classic fish and chips, Arctic cod, and salmon soup being popular choices [4]. Additionally, Icelandic pastries and bread, such as the dark rye bread, are fantastic and widely available in the country\'s many bakeries [5].\n\nCitations:\n[3]: "The famous street food of Iceland is the Hotdog! It is called the Baejarins Beztu Pylsur hot dog is made of a mix of lamb, beef and pork. Other delicacies of iceland include Fish and Chips as well as Tommi\'s burger."\n[4]: "Iceland is very famous for its fish freshly caught from the arctic sea. Famous dishes include the classic fish and chips, arctic cod and

In [None]:
response.split('\n')

["In Iceland, you can eat a variety of dishes, some of which are famous and unique to the country [3, 4]. The Baejarins Beztu Pylsur hot dog is a well-known street food, made of a mix of lamb, beef, and pork [3]. Other popular delicacies include Fish and Chips and Tommi's burger [3]. Iceland is particularly famous for its fresh fish, sourced directly from the Arctic sea, with dishes like the classic fish and chips, Arctic cod, and salmon soup being popular choices [4]. Additionally, Icelandic pastries and bread, such as the dark rye bread, are fantastic and widely available in the country's many bakeries [5].",
 '',
 'Citations:',
 '[3]: "The famous street food of Iceland is the Hotdog! It is called the Baejarins Beztu Pylsur hot dog is made of a mix of lamb, beef and pork. Other delicacies of iceland include Fish and Chips as well as Tommi\'s burger."',
 '[4]: "Iceland is very famous for its fish freshly caught from the arctic sea. Famous dishes include the classic fish and chips, arc

<br/>
<br/>
<br/>

## **More Extensive Experiment Data**

In [6]:
data_folder = os.path.join(os.getcwd(), 'data')
os.makedirs(data_folder, exist_ok=True)

In [7]:
uploaded_files = files.upload()

Saving sweden_articles.csv to sweden_articles.csv
Saving iceland_articles.csv to iceland_articles.csv
Saving finland_articles.csv to finland_articles.csv


In [8]:
for file_name in uploaded_files.keys():
    os.rename(file_name, os.path.join(data_folder, file_name))

In [9]:
article_names = ['finland_articles.csv', 'iceland_articles.csv', 'sweden_articles.csv']
article_fps = [os.path.join('.', 'data', article_name) for article_name in article_names]
docs = []
for article_fp in article_fps:
  df = pd.read_csv(article_fp)
  for _, row in df.iterrows():
    text = row['Title'] + " " + row['Content'] # Take out title later

    doc = Document(
        page_content=text,
        metadata={'country': row['Country'], 'source': row['Source'], 'link': row['Article Links']}
    )

    docs.append(doc)

In [10]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=250, chunk_overlap=50, add_start_index=True
)
chunked_docs = text_splitter.split_documents(docs)

In [11]:
num_docs_retrieved = 10
top_k = 10
bi_encoder_embeddings_model_name = "sentence-transformers/all-mpnet-base-v2"
embeddings_model = HuggingFaceEmbeddings(model_name=bi_encoder_embeddings_model_name)
cross_encoder_embedings_model_name = "BAAI/bge-reranker-large"
cross_encoder_model = HuggingFaceCrossEncoder(model_name=cross_encoder_embedings_model_name)
compressor = CrossEncoderReranker(model=cross_encoder_model, top_n=top_k)
retriever_eval_res = {}
search_type = 'mmr'

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

In [12]:
vector_store_index = faiss.IndexFlatL2(len(embeddings_model.embed_query("hello world")))
faiss_vector_store = FAISS(
  embedding_function=embeddings_model,
  index=vector_store_index,
  docstore=InMemoryDocstore(),
  index_to_docstore_id={})
faiss_vector_store.add_documents(chunked_docs)
faiss_retriever = faiss_vector_store.as_retriever(search_type=search_type, search_kwargs={"k": num_docs_retrieved})
reranked_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=faiss_retriever)

In [13]:
template = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question. Each statement in your answer must be **entailable** by at least one of the provided citations, meaning the information in the statement must align directly and logically with the cited context.

### **IMPORTANT REQUIREMENTS**:
1. **Inline Numbered Citations**: For each statement in your answer, **you must include at least one inline numbered citation** (e.g., [1], [2]). Statements without sufficient support from the context should not be included in the answer.
2. **Entailment Verification**: Ensure that every statement (hypothesis) is directly entailed by the cited context (premise). Use only information explicitly stated or strongly implied by the citation. Do not include unsupported information or make up details.
3. **Sequential Citation Numbering**: Reset the numbering for citations to start from **1** for each response, and ensure the numbering increases sequentially.
4. **Single Paragraph**: Write your response as **a single paragraph**. Avoid using any new line characters in the response. All statements should flow naturally and seamlessly into one another.

At the bottom, provide the full citations corresponding to each number, but include **only the page content** of the Document Object (exclude metadata). Use citations exactly as provided in the context.

Respond in the following format:
---
Statement 1 [1]. Statement 2 [2, 3].

Citations:
[1]: <Page Content 1> //Only the page content of the document
[2]: <Page Content 2>
[3]: <Page Content 3>
---

### **HOW TO APPROACH**:
- Before writing your answer, check if each statement is **entailable** by verifying that it is supported by one or more citations.
- If the context does not support a statement, exclude it from your answer and say "I don't know."

Here are a few examples:

---
The best hikes in Norway include the Reinebringen hike in the Lofoten islands, Preikestolen, Kjeragbolten, and Trolltunga [1, 2]. The Reinebringen hike, although not one of the highest peaks, offers an iconic view of the Reine fjord from its summit [1]. Preikestolen, Kjeragbolten, and Trolltunga are famous for their stunning fjord views and unique geological formations, such as a boulder stuck between a mountain crevasse and a tongue-shaped rock [2].

Citations:
[1]: "The best hikes in Norway include the Reinebringen hike in the Lofoten islands. At a modest 448 meters high, Reinebringen is far from one of the highest peaks on the Lofoten islands. Yet this is more than made up for by the iconic view from the summit of Reine. It is not suitable for winter! Also, the trail can be quite demanding as the steps are quite steep."
[2]: "The most famous hikes in Norway include Preikestolen (a beautiful fjord), Kjeragbolten (with a famous boulder stuck between a mountain crevasse) as well as Trolltunga which resembles a tongue."
---

---
In Switzerland, you can embark on several scenic hikes [1]. One such hike is at Grindelwald, where at the summit, you will find a stunning lake, but it's only visible during the summer [1]. Other notable hikes include Zermatt, also known as the Matterhorn [1], and Lauterbrunnen [1]. For those seeking a challenging hike, the Matterhorn at Zermatt is a must-go [2]. This iconic peak is featured on the Toblerone chocolate and is best explored with a mountain guide due to the inherent dangers [2].

Citations:
[1]: "One of the most scenic hikes in Switzerland can be done at Grindelwald. At the summit of Grindelwald, a beautiful lake awaits you. However, you can only see this lake during summer time. Other notable hikes include Zermatt, i.e. the matterhorn and Lauterbrunnen."
[2]: "The matterhorn at zermatt is a must-go for hiking enthusiasts. It is the icon of the famous chocolate: Toblerone. However, it is recommended to hire a mountain guide to go with you as it can be very dangerous!"
---

---
In Iceland, you can participate in a variety of adventurous activities [1]. For instance, you can hike active volcanoes and explore a natural ice cave, offering unique geological experiences [1]. Driving in Iceland is also an amazing adventure, with open roads, majestic volcanoes, and towering mountains as your backdrop, and the possibility of encountering sheep and arctic foxes along the way [2]. Additionally, Iceland is known for its exceptional diving sites [3]. One of the most famous in the world, Silfra, is located in Iceland [3]. It is the only diving site where you can dive between two tectonic plates, and the water is so fresh that you can drink it, promising an unparalleled tasting experience [3].

Citations:
[1]: "Iceland is a must-go to place for adventurous people! You can hike active volcanoes, drive a jeep through the volcanic ash, explore a natural ice cave, see waterfalls. There are so many opportunities for an adventurer."
[2]: "Driving in Iceland is an amazing experience - open roads, majestic volcanos and towering mountains along the way, sheep and arctic foxes make it a great experience. All you need is an international driving license. And, please drive slowly during the winter!"
[3]: "One of the most famous diving sites in the world, Silfra, is located in Iceland! It is the only diving site in the world where you can dive between 2 tectonic plates. The water is also so fresh that you can drink from it, it is the best water that you will ever taste."
---

Question: {question}

Context: {context}

Helpful Answer:
"""

prompt_template = PromptTemplate.from_template(template)

In [14]:
llm = ChatGroq()
llm_pipeline = llm | StrOutputParser()
pipeline = ( prompt_template | llm | StrOutputParser()  )

### **Define Questions**

In [None]:
questions_answer = ["What hikes can I do in Finland?", "What hikes can I do in Iceland?", "What hikes can I do in Sweden?", "What food can I eat in Sweden?", "How is the transportation in Sweden?"]

In [None]:
responses = []
for q in questions_answer:
  retrieved_docs = reranked_retriever.invoke(q)
  response = pipeline.invoke({"question": q,"context": retrieved_docs})
  print(response)
  print("")
  print("###############################################################")
  responses.append(response)

In Finland, there are several popular hiking trails to consider, including Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland [0, 1]. Additionally, the Oulanka, Riisitunturi, and Hossa National Parks offer shorter hikes with beautiful landscape views [2]. The Karhunkierros trail is particularly famous and will take you across rugged landscapes with astonishing waterfalls that you can admire from hanging bridges [5]. If you're a beginner or seeking easy, accessible hikes, there are short-distance, beginner-level excursions available across Finland's four regions [4]. Weather conditions are an important factor to consider, especially in winter [1, 3].

Citations:
[0]: "The most popular trails in Finland include Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland."
[1]: "hiking in Finland is easy because there are no tall mountains or very demanding trails. Weather conditions, howe

### **Evaluate Citations**

In [None]:
# https://www.sbert.net/docs/cross_encoder/pretrained_models.html#nli
nli_model = CrossEncoder("cross-encoder/nli-deberta-v3-base")

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/738M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/417 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/18.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/156 [00:00<?, ?B/s]



**Citation Recall**

In [None]:
def process_response(response):
  # Get the answer and citations
  response_split = response.split('Citations')
  answer = response_split[0].split('\n')
  citations = response_split[-1].split('\n')

  # For each citation, store its number (key) and text (value) in a dict
  citations_dict = {}
  for citation in citations:
      match = re.match(r"\[(.*?)\]", citation)
      if match:
          citation_number = match.group(1)
          citation_content = ''.join(citation.split(f'[{citation_number}]:')).strip()
          citations_dict[citation_number] = citation_content

  # Extract the statements from the answer
  statements = []
  # For each element in the answer
  for a in answer:
    # If the element's length is > 0
    if len(a.strip())>0:
      # Split the element by '.' to get each statement
      for b in a.split('.'):
        # If the statement's length is > 0, add it to the overall statements
        if len(b.strip())>0:
          statements.append(b.strip())

  # For each statement, store it as a key and it's corresponding citations as value
  statements_citations = {}
  for statement in statements:
      statement_citations = []
      inline_citations = re.findall(r"\[(.*?)\]", statement)
      # For each citation group found in the statement
      for citation_group in inline_citations:
          # To handle cases such as [1, 2]
          for citation in citation_group.split(','):
            citation = citation.strip()
            # If it's a 'valid' citation
            if citation in citations_dict:
                # Clean the text to be free of that citation
                statement = statement.replace(f"[{citation_group}]", "").strip()
                # Append that citation to the statement's citations
                statement_citations.append(citations_dict[citation])
      # Add the statement and its corresponding citations to the dictionary
      statements_citations[statement] = statement_citations

  return statements_citations

In [None]:
def response_citation_recall(response):
  statement_citations = process_response(response)
  entailment_count = 0
  # For each statement, calculate if there is an entailment by feeding the (concatenated citations i.e. premise, statement i.e. hypothesis)
  for k,v in statement_citations.items():
    # Premise should come first
    # https://towardsdatascience.com/natural-language-inference-an-overview-57c0eecf6517
    logits = nli_model.predict(['.'.join(v),k])
    probabilities = torch.softmax(torch.tensor(logits), dim=0)
    formatted_probabilities = [float(f"{val:.4f}") for val in probabilities]
    # If there is an entailment, increment the entailment count for entire response
    if logits.argmax()==1:
      print("Entailment")
      print(formatted_probabilities)
      print("Statement: ")
      print(k)
      print("Citations: ")
      print(v)
      print("")
      print("")
      entailment_count+=1
    else:
      print("Non-Entailment")
      print(formatted_probabilities)
      print("Statement: ")
      print(k)
      print("Citations: ")
      print(v)
      print("")
      print("")
  # To calculate the citation recall for the response, normalise the total entailment count by the number of statements in the response
  # https://ar5iv.labs.arxiv.org/html/2305.14627
  response_citation_recall_val = entailment_count/len(statement_citations)
  return response_citation_recall_val

In [None]:
def overall_citation_recall(responses):
  cumulative_response_citation_recall = 0
  # For each response/set of statements, get its citation recall value
  for response in responses:
    response_citation_recall_val = response_citation_recall(response)
    cumulative_response_citation_recall+=response_citation_recall_val
  # To get the citation recall over the set of responses, sum the citation recall value of each response and normalise it by the number of responses
  return cumulative_response_citation_recall/len(responses)

In [None]:
overall_citation_recall_val = overall_citation_recall(responses)

Entailment
[0.0, 0.9945, 0.0055]
Statement: 
In Finland, there are several popular hiking trails to consider, including Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland
Citations: 
['"The most popular trails in Finland include Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland."', '"hiking in Finland is easy because there are no tall mountains or very demanding trails. Weather conditions, however, are something to do consider"']


Non-Entailment
[0.0001, 0.0001, 0.9998]
Statement: 
Additionally, the Oulanka, Riisitunturi, and Hossa National Parks offer shorter hikes with beautiful landscape views
Citations: 
['"One of the most famous hiking trails in Finland, Karhunkierros, is located here, but if trekking 80-kilometres isn’t your idea of a good time, you can take a shorter hike in Oulanka, Riisitunturi, or Hossa National Park – just be"']


Non-Entailment
[0.0, 0.014, 0.986]

In [None]:
print(f'The overall citation recall is {overall_citation_recall_val}')

The overall citation recall is 0.5565079365079365


**Observations**

The entailment model used is [```nli-deberta-v3-base```](https://huggingface.co/cross-encoder/nli-deberta-v3-base). The model was trained on the SNLI and MultiNLI datasets.  achieves a 92.38 accuracy on the SNLI-test dataset and 90.04 accuracy on the MNLI mismatched set. These are benchmark NLI datasets.

Despite this high performance on benchmark datasets, it is not 100% accurate (as most deep learning models are) and may require some fine-tuning on additional human-labelled datasets for specific tasks.

Some discrepancies can be found below:

```
Non-Entailment
[0.0001, 0.0013, 0.9986]
Statement:
Additionally, there are short-distance, beginner-level excursions available across Finland's four regions
Citations:
['"Short-distance, beginner-level excursions: Check out this selection of easy and accessible half-day nature destinations across Finland’s four regions. Ranked the best outdoor"']

Non-Entailment
[0.0, 0.0018, 0.9982]
Statement:
The difficulty of hiking trails in Finnish national parks ranges from easy to demanding, so considering your experience level is advised
Citations:
['"When it comes to experiencing the natural wonders of Finland, few activities can rival the joy and excitement of going for a walk or a hike. Finland\'s over 40 national parks. To consider is your experience level. Are you a seasoned hiker or just getting started? In general, hiking in Finland is easy because there are no tall mountains or very demanding trails. Weather conditions, however, are something to do consider"']

Non-Entailment
[0.0002, 0.0127, 0.9871]
Statement:
In southern Sweden, there are numerous options for hiking, including trails in the Änggårdsbergen nature reserve and island walks in the archipelago
Citations:
["<Page Content 199> '2 Hiking and biking in Änggårdsbergen nature reserve 3 Island walks in the archipelago'"]

Non-Entailment
[0.0001, 0.0193, 0.9806]
Statement:
During summer, Swedes enjoy pickled herring with aquavit or 'snaps' and strawberries
Citations:
['"herring, Aquavit or \'snaps\' and strawberries. When combined, they will give you the taste of real Swedish summer. Potatoes came to Sweden in the mid-1650s, but it took a hundred years before they were planted, harvested and eaten on a larger scale"']

Non-Entailment
[0.0005, 0.0002, 0.9994]
Statement:
Seafood lovers can participate in lobster and mussel safaris, or try locally caught Arctic char and whitefish, and Kalix Caviar is a must-try delicacy
Citations:
['"meat. Elk meat is another staple source of protein, while fish lovers can tuck into locally caught Arctic char and whitefish. Kalix Caviar is another must-try delicacy. It’s the first Swedish food product to have received Protected Designation of"']

Non-Entailment
[0.0001, 0.0002, 0.9997]
Statement:
This system is easy to navigate and allows for payment with cards
Citations:
['"Sweden has an extensive public transport system with subway, tram and bus, and you can usually pay with your card. If you need a taxi or rental car, choose an electric vehicle if possible. Taxi Stockholm, the largest taxi company in Stockholm,"']

Non-Entailment
[0.0006, 0.0186, 0.9808]
Statement:
Roads in Sweden are well-developed, with the main road running along the coast being the E6, while the E45 and E18 serve some inland areas
Citations:
['"The local roads are well developed – the main road running from the south and along the coast is the E6, while the E45 and E18 serve some of the inland area."']
```

In [None]:
snli_dataset = load_dataset("stanfordnlp/snli", streaming=True)
snli_dataset_samples = 0
snli_dataset_samples_cutoff = 100
for sample in snli_dataset['train']:
    if snli_dataset_samples >= snli_dataset_samples_cutoff:
      break
    if sample['label']==1:
      print(f"Citation/Premise: {sample['premise']}")
      print(f"Hypothesis: {sample['hypothesis']}")
      print("")
      snli_dataset_samples+=1

README.md:   0%|          | 0.00/16.0k [00:00<?, ?B/s]

Citation/Premise: A person on a horse jumps over a broken down airplane.
Hypothesis: A person is training his horse for a competition.

Citation/Premise: Children smiling and waving at camera
Hypothesis: They are smiling at their parents

Citation/Premise: A boy is jumping on skateboard in the middle of a red bridge.
Hypothesis: The boy is wearing safety equipment.

Citation/Premise: An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background.
Hypothesis: An older man drinks his juice as he waits for his daughter to get off work.

Citation/Premise: An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background.
Hypothesis: An elderly man sits in a small shop.

Citation/Premise: Two blond women are hugging one another.
Hypothesis: Some women are hugging on vacation.

Citation/Premise: A few people in a restaurant setting, one of them 

In [None]:
mnli_dataset = load_dataset("nyu-mll/multi_nli", streaming=True)
mnli_dataset_samples = 0
mnli_dataset_samples_cutoff = 100
for sample in mnli_dataset['train']:
    if mnli_dataset_samples >= mnli_dataset_samples_cutoff:
      break
    if sample['label']==1:
      print(f"Citation/Premise: {sample['premise']}")
      print(f"Hypothesis: {sample['hypothesis']}")
      print("")
      mnli_dataset_samples+=1

README.md:   0%|          | 0.00/8.89k [00:00<?, ?B/s]

Citation/Premise: Conceptually cream skimming has two basic dimensions - product and geography.
Hypothesis: Product and geography are what make cream skimming work. 

Citation/Premise: yeah i tell you what though if you go price some of those tennis shoes i can see why now you know they're getting up in the hundred dollar range
Hypothesis: The tennis shoes have a range of prices.

Citation/Premise: But a few Christian mosaics survive above the apse is the Virgin with the infant Jesus, with the Archangel Gabriel to the right (his companion Michael, to the left, has vanished save for a few feathers from his wings).
Hypothesis: Most of the Christian mosaics were destroyed by Muslims.  

Citation/Premise: It's not that the questions they asked weren't interesting or legitimate (though most did fall under the category of already asked and answered).
Hypothesis: All of the questions were interesting according to a focus group consulted on the subject.

Citation/Premise: Thebes held onto powe

**Citation Precision**

In [None]:
def process_response(response):
  # Get the answer and citations
  response_split = response.split('Citations')
  answer = response_split[0].split('\n')
  citations = response_split[-1].split('\n')

  # For each citation, store its number (key) and text (value) in a dict
  citations_dict = {}
  for citation in citations:
      match = re.match(r"\[(.*?)\]", citation)
      if match:
          citation_number = match.group(1)
          citation_content = ''.join(citation.split(f'[{citation_number}]:')).strip()
          citations_dict[citation_number] = citation_content

  # Extract the statements from the answer
  statements = []
  # For each element in the answer
  for a in answer:
    # If the element's length is > 0
    if len(a.strip())>0:
      # Split the element by '.' to get each statement
      for b in a.split('.'):
        # If the statement's length is > 0, add it to the overall statements
        if len(b.strip())>0:
          statements.append(b.strip())

  # For each statement, store it as a key and it's corresponding citations as value
  statements_citations = {}
  for statement in statements:
      statement_citations = []
      inline_citations = re.findall(r"\[(.*?)\]", statement)
      # For each citation group found in the statement
      for citation_group in inline_citations:
          # To handle cases such as [1, 2]
          for citation in citation_group.split(','):
            citation = citation.strip()
            # If it's a 'valid' citation
            if citation in citations_dict:
                # Clean the text to be free of that citation
                statement = statement.replace(f"[{citation_group}]", "").strip()
                # Append that citation to the statement's citations
                statement_citations.append(citations_dict[citation])
      # Add the statement and its corresponding citations to the dictionary
      statements_citations[statement] = statement_citations

  return statements_citations

In [None]:
def response_citation_precision(response):
  label_mapping = ['contradiction', 'entailment', 'neutral']
  statement_citations = process_response(response)
  precision_count = 0
  citation_count = 0
  # For each statement,
  for k,v in statement_citations.items():
    print(f'Current Statement: {k}')
    print(f'Citations for current statement: {v}')
    # For each citation in the statement,
    for i in range(len(v)):
      # Increment the total citation count for the response
      citation_count+=1
      candidate_citation = v[i]
      other_citations = v[:i] + v[i+1:]
      logits_candidate = nli_model.predict([candidate_citation,k])
      logits_other = nli_model.predict(['.'.join(other_citations),k])
      print(f"Candidate citation: {candidate_citation}")
      print(label_mapping[logits_candidate.argmax()])
      print(f"Other citation: {other_citations}")
      print(label_mapping[logits_other.argmax()])
      print("")
      # If the citation fully supports the statement or the other set of citations (excluding the curr one) do not support the statment,
      # increment the precision count
      if logits_candidate.argmax()==1 or logits_other.argmax()!=1:
        precision_count+=1
    print("")
    print("")
    print("")
    print("")
  # https://ar5iv.labs.arxiv.org/html/2305.14627
  return precision_count/citation_count

In [None]:
def overall_citation_precision(responses):
  cumulative_response_citation_precision = 0
  # For each response/set of statements, get its citation precision value
  for response in responses:
    response_citation_precision_val = response_citation_precision(response)
    cumulative_response_citation_precision +=response_citation_precision_val
  # To get the citation precision over the set of responses, sum the citation response value of each response and normalise it by the number of responses
  return cumulative_response_citation_precision/len(responses)

In [None]:
overall_citation_precision_val = overall_citation_precision(responses)

Current Statement: In Finland, there are several popular hiking trails to consider, including Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland
Citations for current statement: ['"The most popular trails in Finland include Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland."', '"hiking in Finland is easy because there are no tall mountains or very demanding trails. Weather conditions, however, are something to do consider"']
Candidate citation: "The most popular trails in Finland include Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland."
neutral
Other citation: ['"hiking in Finland is easy because there are no tall mountains or very demanding trails. Weather conditions, however, are something to do consider"']
neutral

Candidate citation: "hiking in Finland is easy because there are no tall mountains or very demanding tr

In [None]:
print(f'The overall citation precision is {overall_citation_precision_val}')

The overall citation precision is 0.9464285714285715


**Calculate Attribution Groundedness/Citations Grounded (F1_CG)**

In [None]:
f1_cg = (2 * (overall_citation_precision_val * overall_citation_recall_val)) / (overall_citation_precision_val + overall_citation_recall_val)

In [None]:
print(f'The citations grounded value is: {f1_cg}')

The citations grounded value is: 0.7008879065170981


### **Evaluate Response Truthfulness (Grounded Refusal)**


**Quality of Answering**

In [None]:
questions_answer = ["What hikes can I do in Finland?", "What hikes can I do in Iceland?", "What hikes can I do in Sweden?", "What food can I eat in Sweden?", "How is the transportation in Sweden?"]

In [None]:
responses = []
for q in questions_answer:
  retrieved_docs = reranked_retriever.invoke(q)
  response = pipeline.invoke({"question": q,"context": retrieved_docs})
  print(response)
  print("")
  print("###############################################################")
  responses.append(response)

In Finland, there are several popular hiking trails to consider, including Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland [0, 1]. For those seeking a shorter hike, there are options available in Oulanka, Riisitunturi, or Hossa National Park [2]. Even if you're not interested in a long trek, the Karhunkierros trail in Kuusamo offers stunning landscape views [2]. The hiking trails in Finland are generally easy, but weather conditions, especially in winter, should be taken into account [1, 4]. Before your hike, it's advisable to check the rating of your chosen trail [4]. Colored signposts on marked routes will help ensure you don't get lost [0, 4].

Citations:
[0]: "Tips for hiking and walking in Finland Find your inner happiness on a hike: When it comes to experiencing the natural wonders of Finland, few activities can rival the joy and excitement of going for a walk or a hike. Finland's over 40 national parks. The most popular t

In [None]:
response_split = responses[3].split('Citations')
response_split

["In Sweden, you can savor various traditional dishes such as 'isterband', which are smoked pork sausages typically served with creamed dill potatoes [1]. Another traditional meal is 'rotmos och fläsk', consisting of root vegetable mash and pork sausage [1]. A tradition dating back to earlier times is 'ärtsoppa', Swedish yellow pea soup, which is usually accompanied by pancakes [1]. Pickled and fermented foods remain popular in the Swedish diet, with pickled herring ('sill') being a staple [2]. During summer, Swedes often enjoy herring with new potatoes, Aquavit or 'snaps', and strawberries [3].\n\nIn the capital city, Stockholm, there are recommendations for traditional restaurants where you can try dishes like 'sill' (pickled herring), Swedish meatballs, or toast Skagen [4]. Additionally, meat, particularly elk meat and locally caught Arctic char and whitefish, are popular in the country's north [5]. Visitors should not miss trying Kalix Caviar, a must-try delicacy [5].\n\nIn the aut

In [None]:
answer = response_split[0].split('\n')
print(answer)

["In Sweden, you can savor various traditional dishes such as 'isterband', which are smoked pork sausages typically served with creamed dill potatoes [1]. Another traditional meal is 'rotmos och fläsk', consisting of root vegetable mash and pork sausage [1]. A tradition dating back to earlier times is 'ärtsoppa', Swedish yellow pea soup, which is usually accompanied by pancakes [1]. Pickled and fermented foods remain popular in the Swedish diet, with pickled herring ('sill') being a staple [2]. During summer, Swedes often enjoy herring with new potatoes, Aquavit or 'snaps', and strawberries [3].", '', "In the capital city, Stockholm, there are recommendations for traditional restaurants where you can try dishes like 'sill' (pickled herring), Swedish meatballs, or toast Skagen [4]. Additionally, meat, particularly elk meat and locally caught Arctic char and whitefish, are popular in the country's north [5]. Visitors should not miss trying Kalix Caviar, a must-try delicacy [5].", '', "In

In [None]:
# Manually add
model_answered_questions = ["What hikes can I do in Finland?", "What hikes can I do in Iceland?", "What hikes can I do in Sweden?", "What food can I eat in Sweden?", "How is the transportation in Sweden?"]
recall_answer = len(set.intersection(set(questions_answer),set(model_answered_questions))) / len(questions_answer)
precision_answer = len(set.intersection(set(questions_answer),set(model_answered_questions))) / len(model_answered_questions)
f1_answer = (2 * (precision_answer*recall_answer)) / (precision_answer + recall_answer)
print(f'Recall answer: {recall_answer}')
print(f'Precision answer: {precision_answer}')
print(f'F1 answer: {f1_answer}')

Recall answer: 1.0
Precision answer: 1.0
F1 answer: 1.0


**Quality of Refusals**

In [None]:
questions_refuse = ["How old is Barack Obama?", "How fast does a rocket fly?", "Who discovered Gravity?", "What is the fastest fish?", "What is the fastest land animal?"]

In [None]:
responses = []
for q in questions_refuse:
  retrieved_docs = reranked_retriever.invoke(q)
  response = pipeline.invoke({"question": q,"context": retrieved_docs})
  print(response)
  print("")
  print("###############################################################")
  responses.append(response)

I don't know the age of Barack Obama or Santa Claus as the provided context does not include this information [1]. However, it does mention that an artist known as "the artist with the infantile heart" is 72 years old [2]. The context also includes details about Finland and various events, as well as information about the founder of an eye-catching hotel in Sweden [3].

Citations:
[1]: <Page Content 5815>: "USA. He was a student at the University of Minnesota and started dating a Finnish girl, eventually deciding to move to Finland to be with her. Chris finds similarities between his home state and Finland, but he describes Finland as bit purer and"
[2]: <Page Content 1966>: "been described as the “artist with the infantile heart” but his life story certainty lacks the elements of innocence: his three children all died young and his sculptures and paintings received little attention. Age 72, he displayed them to the"
[3]: <Page Content 601>: "he opened Slottsholmen – an eye-catching bo

In [None]:
# Manually add
model_refused_questions = ["How old is Barack Obama?", "How fast does a rocket fly?", "Who discovered Gravity?", "What is the fastest fish?", "What is the fastest land animal?"]
recall_refusal = len(set.intersection(set(questions_refuse),set(model_refused_questions))) / len(questions_refuse)
precision_refusal = len(set.intersection(set(questions_refuse),set(model_refused_questions))) / len(model_refused_questions)
f1_refusal = (2 * (precision_refusal*recall_refusal)) / (precision_refusal + recall_refusal)
print(f'Recall refusal: {recall_refusal}')
print(f'Precision refusal: {precision_refusal}')
print(f'F1 refusal: {f1_refusal}')

Recall refusal: 1.0
Precision refusal: 1.0
F1 refusal: 1.0


**Grounded Refusal F1_RG**

In [None]:
f1_rg = 0.5*(f1_refusal + f1_answer)
print(f'The grounded refusal value is: {f1_rg}')

The grounded refusal value is: 1.0


### **Evaluate Models Answers**

**Response Relevancy by RAGAS**

In [None]:
responses[3]

'In Sweden, you can savor various traditional dishes such as \'isterband\', which are smoked pork sausages typically served with creamed dill potatoes [1]. Another traditional meal is \'rotmos och fläsk\', consisting of root vegetable mash and pork sausage [1]. A tradition dating back to earlier times is \'ärtsoppa\', Swedish yellow pea soup, which is usually accompanied by pancakes [1]. Pickled and fermented foods remain popular in the Swedish diet, with pickled herring (\'sill\') being a staple [2]. During summer, Swedes often enjoy herring with new potatoes, Aquavit or \'snaps\', and strawberries [3].\n\nIn the capital city, Stockholm, there are recommendations for traditional restaurants where you can try dishes like \'sill\' (pickled herring), Swedish meatballs, or toast Skagen [4]. Additionally, meat, particularly elk meat and locally caught Arctic char and whitefish, are popular in the country\'s north [5]. Visitors should not miss trying Kalix Caviar, a must-try delicacy [5].\n

In [None]:
responses[3].split('Citations')

["In Sweden, you can savor various traditional dishes such as 'isterband', which are smoked pork sausages typically served with creamed dill potatoes [1]. Another traditional meal is 'rotmos och fläsk', consisting of root vegetable mash and pork sausage [1]. A tradition dating back to earlier times is 'ärtsoppa', Swedish yellow pea soup, which is usually accompanied by pancakes [1]. Pickled and fermented foods remain popular in the Swedish diet, with pickled herring ('sill') being a staple [2]. During summer, Swedes often enjoy herring with new potatoes, Aquavit or 'snaps', and strawberries [3].\n\nIn the capital city, Stockholm, there are recommendations for traditional restaurants where you can try dishes like 'sill' (pickled herring), Swedish meatballs, or toast Skagen [4]. Additionally, meat, particularly elk meat and locally caught Arctic char and whitefish, are popular in the country's north [5]. Visitors should not miss trying Kalix Caviar, a must-try delicacy [5].\n\nIn the aut

In [None]:
responses[3].split('Citations')[0]

"In Sweden, you can savor various traditional dishes such as 'isterband', which are smoked pork sausages typically served with creamed dill potatoes [1]. Another traditional meal is 'rotmos och fläsk', consisting of root vegetable mash and pork sausage [1]. A tradition dating back to earlier times is 'ärtsoppa', Swedish yellow pea soup, which is usually accompanied by pancakes [1]. Pickled and fermented foods remain popular in the Swedish diet, with pickled herring ('sill') being a staple [2]. During summer, Swedes often enjoy herring with new potatoes, Aquavit or 'snaps', and strawberries [3].\n\nIn the capital city, Stockholm, there are recommendations for traditional restaurants where you can try dishes like 'sill' (pickled herring), Swedish meatballs, or toast Skagen [4]. Additionally, meat, particularly elk meat and locally caught Arctic char and whitefish, are popular in the country's north [5]. Visitors should not miss trying Kalix Caviar, a must-try delicacy [5].\n\nIn the autu

In [24]:
def process_response_answer_only(response):
  # Get the answer and citations
  answer_only = response.split('Citations')[0]
  return answer_only

In [None]:
sample = SingleTurnSample(
        user_input=questions_answer[3],
        response= process_response_answer_only(responses[3])
    )

scorer = ResponseRelevancy(llm=LangchainLLMWrapper(llm), embeddings = embeddings_model)
await scorer.single_turn_ascore(sample)

0.7816860285369606

**Context Recall by RAGAS**

In [19]:
questions_answer = ["What hikes can I do in Finland?", "What hikes can I do in Iceland?", "What hikes can I do in Sweden?", "What food can I eat in Sweden?", "How is the transportation in Sweden?"]

In [21]:
responses = []
retrieved_contexts = []
for q in questions_answer:
  retrieved_docs = reranked_retriever.invoke(q)
  retrieved_contexts.append([doc.page_content for doc in retrieved_docs])
  response = pipeline.invoke({"question": q,"context": retrieved_docs})
  print(response)
  print("")
  print("###############################################################")
  responses.append(response)

In Finland, you can hike on several popular trails, including Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland [0, 1]. If you're not up for long treks, there are shorter hiking options available in Oulanka, Riisitunturi, or Hossa National Park [2]. The hiking trails in Finland are generally easy due to the absence of tall mountains or very demanding trails, but weather conditions, particularly in winter, should be taken into consideration [1, 4]. In Lapland, you can find the epic Karhunkierros Trail, which offers stunning waterfalls and hanging bridges, and Urho Kekkonen National Park, which is popular and has fewer mosquitos due to wind and open areas [3, 5].

Citations:
[0]: "Tips for hiking and walking in Finland Find your inner happiness on a hike: When it comes to experiencing the natural wonders of Finland, few activities can rival the joy and excitement of going for a walk or a hike. Finland's over 40 national parks"
[1]: 

In [22]:
retrieved_contexts[0]

["trail. Coloured signposts on marked routes will help in not getting lost. The most popular trails in Finland include Karhunkierros in Kuusamo, Hetta-Pallas Trail in Western Lapland, and Pyhä-Luosto Trail in Central Lapland. If you're looking for",
 'landscape views. One of the most famous hiking trails in Finland, Karhunkierros, is located here, but if trekking 80-kilometres isn’t your idea of a good time, you can take a shorter hike in Oulanka, Riisitunturi, or Hossa National Park – just be',
 "Tips for hiking and walking in Finland Find your inner happiness on a hike: When it comes to experiencing the natural wonders of Finland, few activities can rival the joy and excitement of going for a walk or a hike. Finland's over 40 national parks",
 'to consider is your experience level. Are you a seasoned hiker or just getting started? In general, hiking in Finland is easy because there are no tall mountains or very demanding trails. Weather conditions, however, are something to do consid

In [25]:
sample = SingleTurnSample(
    user_input=questions_answer[0],
    response="blank",
    reference=process_response_answer_only(responses[0]),
    retrieved_contexts=retrieved_contexts[0],
)

context_recall = LLMContextRecall(llm=LangchainLLMWrapper(llm)) #Wrap in LangChainLLMWrapper https://docs.ragas.io/en/v0.1.21/howtos/customisations/bring-your-own-llm-or-embs.html, https://docs.ragas.io/en/latest/getstarted/rag_evaluation/#choosing-evaluator-llm
await context_recall.single_turn_ascore(sample)

1.0

In [26]:
sample = SingleTurnSample(
    user_input=questions_answer[1],
    response="blank",
    reference=process_response_answer_only(responses[1]),
    retrieved_contexts=retrieved_contexts[1],
)

context_recall = LLMContextRecall(llm=LangchainLLMWrapper(llm)) #Wrap in LangChainLLMWrapper https://docs.ragas.io/en/v0.1.21/howtos/customisations/bring-your-own-llm-or-embs.html, https://docs.ragas.io/en/latest/getstarted/rag_evaluation/#choosing-evaluator-llm
await context_recall.single_turn_ascore(sample)

1.0

In [27]:
sample = SingleTurnSample(
    user_input=questions_answer[2],
    response="blank",
    reference=process_response_answer_only(responses[2]),
    retrieved_contexts=retrieved_contexts[2],
)

context_recall = LLMContextRecall(llm=LangchainLLMWrapper(llm)) #Wrap in LangChainLLMWrapper https://docs.ragas.io/en/v0.1.21/howtos/customisations/bring-your-own-llm-or-embs.html, https://docs.ragas.io/en/latest/getstarted/rag_evaluation/#choosing-evaluator-llm
await context_recall.single_turn_ascore(sample)

1.0

<br/>
<br/>
<br/>
<br/>
<br/>

## **Conclusions**

We decide to use LLMs from LLM providers such as <u>ChatGroq</u> due to the fast inference speed and ability to output well-structured outputs which makes it easy for formatting