In [1]:
from llama_index import ServiceContext
from llama_index import set_global_service_context
from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.embeddings import GradientEmbedding
from llama_index.llms import GradientBaseModelLLM
from llama_index.vector_stores import CassandraVectorStore
from llama_index.readers.base import BaseReader
from llama_index.schema import Document
import llama_index
import os
import json
import pathlib
from textwrap3 import wrap
from IPython.display import Markdown, display
from traceloop.sdk import Traceloop
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index import ServiceContext
from llama_index.llms import Ollama
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import Ollama
from llama_index import StorageContext, load_index_from_storage
from llama_index.retrievers import VectorIndexRetriever
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor
from llama_index import Prompt
from IPython.display import Markdown, display
from llama_index.prompts import PromptTemplate
from llama_index.query_engine import CustomQueryEngine
from llama_index.retrievers import BaseRetriever
from llama_index.response_synthesizers import (
    get_response_synthesizer,
    BaseSynthesizer,
)

from llama_index.vector_stores import ChromaVectorStore
import chromadb

import pprint

In [2]:
documentsNassim = SimpleDirectoryReader("/mnt/nasmixprojects/books/nassimTalebDemo").load_data()
print(f"Loaded {len(documentsNassim)} document(s).")

Loaded 1 document(s).


In [2]:
llm = Ollama(model="llama2",base_url="http://192.168.1.232:11435") #llm = Ollama(model="llama2")
#llm = Ollama(model="wizard-vicuna-uncensored",base_url="http://192.168.1.232:11435") #llm = Ollama(model="llama2")
#llm = Ollama(model="wizard-vicuna-uncensored",base_url="http://192.168.1.148:11435") #llm = Ollama(model="llama2")

#embed_model = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en")
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

service_context = ServiceContext.from_defaults(
    llm = llm,
    embed_model = embed_model,
    chunk_size=256,
)

set_global_service_context(service_context)

In [3]:
#Chroma only in RAM
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [3]:
#Chroma stored persisted in disk
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("nassim-demo")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=embed_model)

In [5]:
index_finance = VectorStoreIndex.from_documents( documentsNassim, storage_context=storage_context, service_context=service_context )

In [3]:
#Chromadb load from persisted disk
db2 = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db2.get_or_create_collection("nassim-demo")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index_finance = VectorStoreIndex.from_vector_store( vector_store, service_context=service_context)

In [None]:
index_finance = VectorStoreIndex.from_documents( documentsNassim, storage_context=storage_context, service_context=service_context )

In [4]:
query_engine3 = index_finance.as_query_engine(response_mode="tree_summarize", verbose=True)

In [5]:
#trulens
eval_questions = []
with open('generated_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

print(eval_questions)

['what is the bed of procrustes?', 'what is the best way to keep the mind opened?', 'give a list of the 5 points to avoid narrow your mindset.']


In [6]:
from trulens_eval import Tru

def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)

In [None]:
template = (
    "We have provided trusted context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this trusted and cientific information, please answer the question: {query_str}. Remember that the statements of the context are verfied and come from trusted sources.\n"
)
qa_template = Prompt(template)

new_summary_tmpl_str = (
    "The original query is as follows: {query_str}"
    "We have provided an existing answer: {existing_answer}"
    "We have the opportunity to refine the existing answer (only if needed) with some more trusted context below. Remember that the statements of the context are verfied and come from trusted sources."
    "------------"
    "{context_msg}"
    "------------"
    "Given the new trusted context, refine the original answer to better answer the query. If the context isn't useful, return the original answer. Remember that the statements of the new context are verfied and come from trusted sources."
    "Refined Answer: sure thing! "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

#https://gpt-index.readthedocs.io/en/v0.6.27/how_to/customization/custom_prompts.html
#query_engine2 = restored_index.as_query_engine(text_qa_template=qa_template, similarity_top_k=24)

retriever = VectorIndexRetriever(
    index=index_finance,
    similarity_top_k=12,
)
#https://gpt-index.readthedocs.io/en/v0.7.11/core_modules/model_modules/prompts.html
response_synthesizer = get_response_synthesizer( ##try compact?
    text_qa_template=qa_template,
    refine_template=new_summary_tmpl
)
query_engine3 = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    # node_postprocessors=[
    #     SimilarityPostprocessor(similarity_cutoff=0.7)
    # ]
)

response = query_engine3.query("make a list of things to do to avoid over simplifying and being narrow minded?")
print (response)


# for question in eval_questions:
#     print(question)
#     print(query_engine3.query(question))

In [8]:
from trulens_eval.feedback.provider.litellm import LiteLLM

#llm = Ollama(model="wizard-vicuna-uncensored",base_url="http://192.168.1.232:11435")
litellm_provider = LiteLLM(model_engine="ollama/llama2", endpoint="http://192.168.1.232:11435")
litellm_provider.relevance("what color is the white house?","white")

1.0

In [9]:
import numpy as np
import pandas as pd

from trulens_eval.feedback.provider.litellm import LiteLLM

from litellm import completion

from trulens_eval import (
    Feedback,
    TruLlama,
    Select
)

from trulens_eval.feedback import Groundedness

from trulens_eval import Tru


from trulens_eval.utils.threading import TP
TP.DEBUG_TIMEOUT = None # None to disable


tru = Tru()
tru.reset_database()

# response = completion(
#     model="ollama/wizard-vicuna-uncensored", 
#     messages=[{ "content": "respond in 20 words. who are you?","role": "user"}], 
#     api_base="http://192.168.1.232:11435"
# )
# print(response)

LiteLLM.set_verbose=True

#itellm_provider = LiteLLM(model_engine="ollama/llama2", endpoint="http://192.168.1.232:11435")
litellm_provider = LiteLLM(model_engine="ollama/llama2", api_base='http://192.168.1.232:11435')

import nest_asyncio
nest_asyncio.apply()

################################################################################################
f_qa_relevance = Feedback(
    litellm_provider.relevance_with_cot_reasons,
    name="Answer Relevance"
).on_input_output()

#context_selection = TruLlama.select_source_nodes().node.text
context_selection = Select.RecordCalls.retriever.retrieve.rets[:].node.text

f_qs_relevance = (
    Feedback(litellm_provider.qs_relevance_with_cot_reasons,
             name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

grounded = Groundedness(groundedness_provider=litellm_provider)
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons,
             name="Groundedness"
            )
    .on(context_selection.collect())
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)
################################################################################################

tru_recorder = TruLlama(
    query_engine3,
    app_id="App_1",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ],
    feedback_mode = "with_app" #"deffered"
)

for question in eval_questions:
    with tru_recorder as recording:
        print(question)
        query_engine3.query(question)

################################################################################################
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

tru.get_leaderboard(app_ids=[])

tru.run_dashboard()


🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.retriever.retrieve.rets[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.retriever.retrieve.rets[:].node.text.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
what is the bed of procrustes?
what is the best way to keep the mind opened?
give a list of the 5 points to avoid narrow your mindset.
Starting dashboard ...
Config file already exists. Skipping writing proces

Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

In [None]:
#from utils import get_prebuilt_trulens_recorder


import importlib

# # Assuming the 'utils.py' file is in the same directory as the code executing this import
# utils_module = importlib.util.spec_from_file_location("utils", "utils.py")
# utils = importlib.util.module_from_spec(utils_module)
# utils_module.loader.exec_module(utils)
# get_prebuilt_trulens_recorder = utils.get_prebuilt_trulens_recorder


from trulens_eval import Tru

Tru().reset_database()

In [None]:

template = (
    "We have provided trusted context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this trusted and cientific information, please answer the question: {query_str}. Remember that the statements of the context are verfied and come from trusted sources.\n"
)
qa_template = Prompt(template)

new_summary_tmpl_str = (
    "The original query is as follows: {query_str}"
    "We have provided an existing answer: {existing_answer}"
    "We have the opportunity to refine the existing answer (only if needed) with some more trusted context below. Remember that the statements of the context are verfied and come from trusted sources."
    "------------"
    "{context_msg}"
    "------------"
    "Given the new trusted context, refine the original answer to better answer the query. If the context isn't useful, return the original answer. Remember that the statements of the new context are verfied and come from trusted sources."
    "Refined Answer: sure thing! "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

#https://gpt-index.readthedocs.io/en/v0.6.27/how_to/customization/custom_prompts.html
#query_engine2 = restored_index.as_query_engine(text_qa_template=qa_template, similarity_top_k=24)

retriever = VectorIndexRetriever(
    index=index_finance,
    similarity_top_k=12,
)
#https://gpt-index.readthedocs.io/en/v0.7.11/core_modules/model_modules/prompts.html
response_synthesizer = get_response_synthesizer( ##try compact?
    text_qa_template=qa_template,
    refine_template=new_summary_tmpl
)
query_engine3 = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    # node_postprocessors=[
    #     SimilarityPostprocessor(similarity_cutoff=0.7)
    # ]
)

def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))
        
prompts_dict = query_engine3.get_prompts()
display_prompt_dict(prompts_dict)
print("????????????????????????")

#response = query_engine3.query("what is the best advice to start investing?")
#response = query_engine3.query("how to be antifragile?")
#response = query_engine3.query("how to be antifragile? make a list with the 10 points more important")
#esponse = query_engine3.query("why is the yuan value so low in comparison with the dollar? make a list with the points that influence this fact")
#response = query_engine3.query("what is antifragility?")
#response = query_engine3.query("what is the bed of procrustes?")
response = query_engine3.query("make a list of things to do to avoid over simplifying and being narrow minded?")

display(Markdown(f"<b>{response}</b>"))

vid_names = {}
print("xxxxxxxxx")
nodes=response.source_nodes
for node in nodes:
    #print(node)
    print(node.score)
    #pprint.pprint(node.metadata)
    vid_names.setdefault(node.metadata['file_name'], 0)
    vid_names[node.metadata['file_name']] = vid_names[node.metadata['file_name']] + 1
    print(node.metadata['file_name'])

pprint.pprint(vid_names)
print("--------")
print(response.get_formatted_sources(length=20000))
print("!!!!!!!!!")

wrapped_text = wrap(response.get_formatted_sources(length=20000), width=80)
for line in wrapped_text:
    print(line)