In [1]:
import pandas as pd
import numpy as np
import ast

from dotenv import load_dotenv
from llm_requests import *
from pinecone_db import *

from rag import RAG

from tqdm import tqdm
from datetime import datetime

load_dotenv()

Package protobuf is installed but has a version conflict:
	(protobuf 3.20.3 (/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages), Requirement.parse('protobuf>=4.23.2'))

This package is optional for trulens_eval so this may not be a problem but if
you need to use the related optional features and find there are errors, you
will need to resolve the conflict:

    ```bash
    pip install 'protobuf>=4.23.2'
    ```

If you are running trulens_eval in a notebook, you may need to restart the
kernel after resolving the conflict. If your distribution is in a bad place
beyond this package, you may need to reinstall trulens_eval so that all of the
dependencies get installed and hopefully corrected:
    
    ```bash
    pip uninstall -y trulens_eval
    pip install trulens_eval
    ```

Package nbconvert is installed but has a version conflict:
	(nbconvert 6.3.0 (/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages), Requirement.parse('nbconver

True

In [2]:
import os
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['TOKENIZERS_PARALLELISM'] = "false"

In [3]:
CHUNKING_STRATEGIES = [
    'fixed-size-splitter',
    'recursive-splitter',
    'semantic-splitter'
    ]
EMBEDDING_MODELS = [
    'all-MiniLM-L6-v2',
    'bert-base-nli-mean-tokens',
    'gtr-t5-base'
]

In [4]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'fixed-size-splitter'
embedding_model = 'all-MiniLM-L6-v2'
generator_model = 'LLaMA-13B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [9]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'semantic-splitter'
embedding_model = 'all-MiniLM-L6-v2'
generator_model = 'LLaMA-13B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [19]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'recursive-splitter'
embedding_model = 'gtr-t5-base'
generator_model = 'LLaMA-13B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [4]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'fixed-size-splitter'
embedding_model = 'all-MiniLM-L6-v2'
generator_model = 'Mistral-7B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [19]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'semantic-splitter'
embedding_model = 'all-MiniLM-L6-v2'
generator_model = 'Mistral-7B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [27]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'recursive-splitter'
embedding_model = 'gtr-t5-base'
generator_model = 'Mistral-7B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [34]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'fixed-size-splitter'
embedding_model = 'all-MiniLM-L6-v2'
generator_model = 'OpenHermes-7B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [44]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'semantic-splitter'
embedding_model = 'all-MiniLM-L6-v2'
generator_model = 'OpenHermes-7B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [48]:
timestamp = datetime.now().strftime("%d%m%Y_%H%M%S")
chunking_strategy = 'recursive-splitter'
embedding_model = 'gtr-t5-base'
generator_model = 'OpenHermes-7B'

rag = RAG(chunking_strategy=chunking_strategy, embedding_model=embedding_model)

In [10]:
DATASET = os.getenv('DATASET')
data = pd.read_csv(DATASET)
data['genres'] = data['genres'].apply(ast.literal_eval)

In [35]:
from trulens_eval import Tru

tru = Tru()


In [49]:
from trulens_eval import Feedback, Select
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider.openai import OpenAI

import numpy as np


provider = OpenAI()

grounded = Groundedness(groundedness_provider=provider)

# Groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness")
    .on(Select.RecordCalls.retrieve.rets.collect())
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Relevance between overall query and answer
f_answer_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name = "Answer Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on_output()
)

# Relevance between search query and each context chunk
f_context_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons, name = "Context Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on(Select.RecordCalls.retrieve.rets.collect())
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.retrieve.rets.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.app.retrieve.args.query .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.app.retrieve.args.query .
✅ In Context Relevance, input statement will be set to __record__.app.retrieve.rets.collect() .


In [50]:
from trulens_eval import TruCustomApp

app_id = f'{chunking_strategy}&{embedding_model}&{generator_model}&{timestamp}'
tru_recorder = TruCustomApp(rag,
    app_id = app_id,
    feedbacks = [f_groundedness, f_answer_relevance, f_context_relevance])

In [39]:
with open('TestQueries.txt', 'r', encoding='utf-8') as f:
    search_queries = f.readlines()
    search_queries = list(map(lambda q: q.strip().replace("'", "`"), search_queries))

search_queries[:3]

['\ufeffAnimation movie toys move into new house, jealous toy sheriff, space ranger action figure, sadistic neighbor, mutant toys, Pizza Planet, Christmas gift-opening scene',
 'Documentary martial arts dance rituals, war dances, sword dances, cultural significance, martial arts performance with music',
 'Dark comedy series based on graphic novel, teen psychopath road trip, rebel adventure, star-crossed teenagers']

In [40]:
q = 'Animation movie toys move into new house, jealous toy sheriff, space ranger action figure, sadistic neighbor, mutant toys, Pizza Planet, Christmas gift-opening scene'
rag.query(q)

'Mickey\'s Once Upon a Christmas is relevant for you because it features various animated segments that showcase different aspects of holiday celebrations. One segment called "Stuck on Christmas" involves toys moving into a new house and experiencing mishaps when a jealous toy sheriff tries to ruin their fun. This part might resonate with your query as it has similar themes, such as toys coming together in a new environment and dealing with conflict among them. Additionally, the film includes other segments that feature popular Disney characters engaging in heartwarming stories of love and friendship during Christmas time.'

In [51]:
MAX_RETRIES = 3

for query in tqdm(search_queries):
    attempt = 0
    success = False
    while not success and attempt <= MAX_RETRIES:
        try:
            with tru_recorder as recording:
                rag.query(query)
            success = True
        except Exception:
            attempt += 1
            print(query)

 51%|█████     | 51/100 [07:32<07:10,  8.78s/it]Validation error: 1 validation error for Rating
rating
  Value error, Rating must be between 0 and 10 [type=value_error, input_value=75, input_type=int]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error
 84%|████████▍ | 84/100 [12:18<02:17,  8.58s/it]Validation error: 1 validation error for Rating
rating
  Value error, Rating must be between 0 and 10 [type=value_error, input_value=33, input_type=int]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error
 89%|████████▉ | 89/100 [13:05<01:38,  8.94s/it]Validation error: 1 validation error for Rating
rating
  Value error, Rating must be between 0 and 10 [type=value_error, input_value=33, input_type=int]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error
100%|██████████| 100/100 [14:42<00:00,  8.82s/it]


In [71]:
app_id = 'recursive-splitter&gtr-t5-base&OpenHermes-7B&11042024_180906'
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Groundedness,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_8580d9720b0e6dab46cdd1555dea6d74,"""\ufeffAnimation movie toys move into new hous...","""Toy Story 2 is relevant for you because it fe...",-,"{""record_id"": ""record_hash_8580d9720b0e6dab46c...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:09:06.637025"", ""...",2024-04-11T18:09:16.438960,1.0,0.8,0.300,[{'args': {'prompt': '﻿Animation movie toys mo...,[{'args': {'question': '﻿Animation movie toys ...,"[{'args': {'source': [""Toy Story 2\nMeanwhile,...",9,0,0.0
1,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_7006debbc723797610532c0a0f9d0dba,"""Documentary martial arts dance rituals, war d...","""The film March of the Penguins is not directl...",-,"{""record_id"": ""record_hash_7006debbc7237976105...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:09:16.796487"", ""...",2024-04-11T18:09:28.671274,0.8,0.0,0.000,[{'args': {'prompt': 'Documentary martial arts...,[{'args': {'question': 'Documentary martial ar...,[{'args': {'source': ['March of the Penguins (...,11,0,0.0
2,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_44e5f301e612ba06d3fee8bea8dbdacc,"""Dark comedy series based on graphic novel, te...","""The Heroes of Evil is relevant for you becaus...",-,"{""record_id"": ""record_hash_44e5f301e612ba06d3f...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:09:28.930920"", ""...",2024-04-11T18:09:39.118795,0.8,0.8,0.420,[{'args': {'prompt': 'Dark comedy series based...,[{'args': {'question': 'Dark comedy series bas...,[{'args': {'source': ['The Heroes of Evil\nThr...,10,0,0.0
3,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_e747ce0e30d0edaf5ae3c5bc3a3e12e5,"""TV series rich kid high school president ambi...","""American Dreamz might be relevant to you beca...",-,"{""record_id"": ""record_hash_e747ce0e30d0edaf5ae...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:09:39.460768"", ""...",2024-04-11T18:09:49.869624,0.9,0.2,0.640,[{'args': {'prompt': 'TV series rich kid high ...,[{'args': {'question': 'TV series rich kid hig...,[{'args': {'source': ['American Dreamz\nOn the...,10,0,0.0
4,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_2e6949d5d716d45ff046dfe6261c98a4,"""Movie Upper Paleolithic Europe hunting expedi...","""\""Saawan\"" is relevant for you because it tel...",-,"{""record_id"": ""record_hash_2e6949d5d716d45ff04...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:09:50.213499"", ""...",2024-04-11T18:10:04.001391,0.9,0.2,0.360,[{'args': {'prompt': 'Movie Upper Paleolithic ...,[{'args': {'question': 'Movie Upper Paleolithi...,[{'args': {'source': ['Saawan\nOutcast from so...,13,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_e9306970cbd3ba35a2f05dfee6c15b4a,"""Cult classic underground following, quirky ch...","""Honey, I Shrunk the Kids is a cult classic wi...",-,"{""record_id"": ""record_hash_e9306970cbd3ba35a2f...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:22:59.044498"", ""...",2024-04-11T18:23:11.302922,1.0,0.8,0.375,[{'args': {'prompt': 'Cult classic underground...,[{'args': {'question': 'Cult classic undergrou...,"[{'args': {'source': ['Honey, I Shrunk the Kid...",12,0,0.0
96,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_dc9f86ce717cf0a7005e9b04ef6332c6,"""Documentary film real-life stories, informati...","""Last Train Home is a relevant movie choice fo...",-,"{""record_id"": ""record_hash_dc9f86ce717cf0a7005...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:23:11.688981"", ""...",2024-04-11T18:23:19.279741,1.0,0.9,0.400,[{'args': {'prompt': 'Documentary film real-li...,[{'args': {'question': 'Documentary film real-...,[{'args': {'source': ['Last Train Home\nAwards...,7,0,0.0
97,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_aa0a173903aa80e6fe553a2dd0975c5a,"""Drama series compelling storytelling, intrica...","""Tokyo Decadence is a relevant movie for you i...",-,"{""record_id"": ""record_hash_aa0a173903aa80e6fe5...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:23:19.652830"", ""...",2024-04-11T18:23:27.580163,0.9,0.8,0.450,[{'args': {'prompt': 'Drama series compelling ...,[{'args': {'question': 'Drama series compellin...,"[{'args': {'source': [""Tokyo Decadence (Topâzu...",7,0,0.0
98,recursive-splitter&gtr-t5-base&OpenHermes-7B&1...,"{""tru_class_info"": {""name"": ""TruCustomApp"", ""m...",RAG(rag),record_hash_3712aeb3a6d36e86e7448ea584703b03,"""Fantasy movie magical adventure, mythical cre...","""Castle in the Sky is relevant for you because...",-,"{""record_id"": ""record_hash_3712aeb3a6d36e86e74...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-04-11T18:23:27.960780"", ""...",2024-04-11T18:23:36.793205,1.0,0.9,0.375,[{'args': {'prompt': 'Fantasy movie magical ad...,[{'args': {'question': 'Fantasy movie magical ...,"[{'args': {'source': [""Laputa: Castle in the S...",8,0,0.0


In [76]:
pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback].sort_values(by=['Groundedness', 'Context Relevance', 'Answer Relevance'], ascending=False).head()

Unnamed: 0,input,output,Groundedness,Answer Relevance,Context Relevance
94,"""Crime thriller suspenseful plot, thrilling action, crime-solving intrigue""","""\""Basic Instinct\"" is relevant for you because it offers a gripping crime thriller that will keep you on edge with its suspenseful plot and intense action scenes. The film follows the journey of a detective as he delves into solving a brutal murder case involving a famous rock star, eventually becoming entangled in a web of intrigue surrounding a mysterious novelist who is also the prime suspect.\nThe movie's captivating narrative, which weaves together elements of crime-solving and personal investigation, offers you an immersive experience that combines thrilling action with suspenseful drama, making it a perfect choice for fans of crime thrillers seeking intrigue and excitement in their viewing experience.""",1.0,1.0,0.9
19,"""Independent thriller film featuring a lone survivor stranded in the wilderness, psychological tension, survival skills put to the test, and a mysterious threat lurking in the shadows""","""The Survivalist is an independent thriller that may be relevant for you because it features a protagonist who has survived a severe drop in global population by living off the land with his efficient wilderness skills. The film focuses on psychological tension as he encounters two women, Kathryn and Milja, whose arrival brings new challenges to his isolated existence. As he is forced into an agreement for survival that puts him at risk, the mysterious threat lurking in the shadows adds a layer of intrigue to this tense situation.""",1.0,0.9,0.9
39,"""Cult classic underground following, unconventional storytelling, avant-garde style""","""Disco Dancer is relevant as it represents a cult classic film with an underground following due to its unique narrative structure that combines elements of drama, dance, and revenge in one package. The unconventional storytelling approach offers viewers something different from the typical Bollywood movie formula, making it appealing for those who appreciate avant-garde style in cinema.""",1.0,1.0,0.8
35,"""Children`s movie playful antics, life lessons, imaginative worlds""","""The movie \""Fukrey Boyzzz\"" is relevant to your query as it features children's playful antics and imaginative worlds in its storyline. The movie revolves around a trio of troublemaking friends who dream big and engage in various mischievous activities, reminiscent of the childhood experiences many viewers may relate to. Moreover, their adventures within an animated setting serve as life lessons for both children and adults alike about friendship, responsibility, and embracing one's imagination. The movie combines humor with valuable teachings in a manner that is entertaining yet meaningful, making it perfect for those looking for fun and educational content.""",1.0,0.9,0.8
74,"""International film foreign language, diverse cultures, global cinema""","""The movie \""The Shaukeens\"" is relevant for you if you are looking for international films that explore diverse cultures and global cinema because it features characters from different backgrounds coming together on an adventure in a beautiful tropical setting - Mauritius. The interactions between the three senior men from India and the young woman highlight cultural differences while also showing similarities, making this film a great example of how people from various parts of the world can connect and share experiences despite their age or origin.""",1.0,0.9,0.8


In [69]:
trulens_metrics = tru.get_leaderboard(app_ids=[
 'fixed-size-splitter&all-MiniLM-L6-v2&LLaMA-13B&10042024_131047',
 'fixed-size-splitter&all-MiniLM-L6-v2&Mistral-7B&11042024_154907',
 'fixed-size-splitter&all-MiniLM-L6-v2&OpenHermes-7B&11042024_172638',
 'recursive-splitter&gtr-t5-base&LLaMA-13B&10042024_181818',
 'recursive-splitter&gtr-t5-base&Mistral-7B&11042024_162933',
 'recursive-splitter&gtr-t5-base&OpenHermes-7B&11042024_180906',
 'semantic-splitter&all-MiniLM-L6-v2&LLaMA-13B&10042024_174010',
 'semantic-splitter&all-MiniLM-L6-v2&Mistral-7B&11042024_161343',
 'semantic-splitter&all-MiniLM-L6-v2&OpenHermes-7B&11042024_175147'
])
cols = list(trulens_metrics.columns)
for col in cols:
    trulens_metrics[col] = trulens_metrics[col].apply(lambda val: round(val, 4))
trulens_metrics = trulens_metrics.reset_index().sort_values('app_id').reset_index(drop=True).rename(
    columns={'app_id': 'Chunking Strategy + Embedding model + LLM'}
).drop(columns=['total_cost'])
trulens_metrics['Chunking Strategy + Embedding model + LLM'] = trulens_metrics['Chunking Strategy + Embedding model + LLM'].apply(lambda model: ' + '.join(str(model).split('&')[:-1]))
trulens_metrics

Unnamed: 0,Chunking Strategy + Embedding model + LLM,Groundedness,Answer Relevance,Context Relevance,latency
0,fixed-size-splitter + all-MiniLM-L6-v2 + LLaMA...,0.3478,0.8391,0.5663,29.0
1,fixed-size-splitter + all-MiniLM-L6-v2 + Mistr...,0.3953,0.8614,0.5673,27.495
2,fixed-size-splitter + all-MiniLM-L6-v2 + OpenH...,0.3425,0.8694,0.5735,28.0612
3,recursive-splitter + gtr-t5-base + LLaMA-13B,0.3664,0.866,0.589,27.66
4,recursive-splitter + gtr-t5-base + Mistral-7B,0.4094,0.8636,0.5828,27.495
5,recursive-splitter + gtr-t5-base + OpenHermes-7B,0.4397,0.868,0.591,27.66
6,semantic-splitter + all-MiniLM-L6-v2 + LLaMA-13B,0.3364,0.8296,0.5929,28.0612
7,semantic-splitter + all-MiniLM-L6-v2 + Mistral-7B,0.3938,0.8398,0.5622,28.0612
8,semantic-splitter + all-MiniLM-L6-v2 + OpenHer...,0.4035,0.8551,0.5724,28.0612


In [70]:
trulens_metrics.to_csv('generation_validation/metrics.csv', index=False)

In [54]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

RuntimeError: Dashboard failed to start in time. Please inspect dashboard logs for additional information.