In [5]:
!pip install -q trulens_eval llama-index llama-index-llms-huggingface llama-index-embeddings-huggingface jupyterlab==3.6.0 transformers accelerate llama-index-readers-web

In [6]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hf_api = user_secrets.get_secret("hf_api")

In [None]:
hf_a

In [None]:
from llama_index.readers.web import BeautifulSoupWebReader

url = "https://www.theverge.com/2023/9/29/23895675/ai-bot-social-network-openai-meta-chatbots"

documents = BeautifulSoupWebReader().load_data([url])
documents

In [None]:
!huggingface-cli download microsoft/Phi-3-mini-4k-instruct-gguf Phi-3-mini-4k-instruct-q4.gguf --local-dir . --local-dir-use-symlinks False


In [None]:
from huggingface_hub import login

login(new_session=False, # Won’t request token if one is already saved on machine
write_permission=True, # Requires a token with write permission
token=hf_api, # The name of your token
add_to_git_credential=True)

In [None]:
# we import from TruLens
from trulens_eval import Tru
tru = Tru()

In [None]:
from llama_index.llms.huggingface import HuggingFaceLLM
import torch


def messages_to_prompt(messages):
    prompt = ""
    system_found = False
    for message in messages:
        if message.role == "system":
            prompt += f"<|system|>\n{message.content}<|end|>\n"
            system_found = True
        elif message.role == "user":
            prompt += f"<|user|>\n{message.content}<|end|>\n"
        elif message.role == "assistant":
            prompt += f"<|assistant|>\n{message.content}<|end|>\n"
        else:
            prompt += f"<|user|>\n{message.content}<|end|>\n"

    # trailing prompt
    prompt += "<|assistant|>\n"

    if not system_found:
        prompt = (
            "<|system|>\nYou are a helpful AI assistant.<|end|>\n" + prompt
        )

    return prompt


llm = HuggingFaceLLM(
    model_name="microsoft/Phi-3-mini-4k-instruct",
    model_kwargs={
        "trust_remote_code": True,
        "torch_dtype": torch.bfloat16
    },
    generate_kwargs={"do_sample": True, "temperature": 0.1},
    tokenizer_name="microsoft/Phi-3-mini-4k-instruct",
    query_wrapper_prompt=(
        "<|system|>\n"
        "You are a helpful AI assistant.<|end|>\n"
        "<|user|>\n"
        "{query_str}<|end|>\n"
        "<|assistant|>\n"
    ),
    messages_to_prompt=messages_to_prompt,
    is_chat_model=True,
)
llm

In [None]:
# we import the feedback provider
from trulens_eval.feedback.provider import Huggingface
from trulens_eval import Feedback
import numpy as np

# Initialize provider class
provider = Huggingface()

In [None]:
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

Settings.llm = llm
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
Settings

In [None]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_documents(documents)

In [None]:
from llama_index.core import SummaryIndex

summary_index = SummaryIndex.from_documents(documents)

In [None]:
from llama_index.core.response.notebook_utils import display_response

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
query_engine = vector_index.as_query_engine(response_mode="compact")

from trulens_eval import TruLlama
context = TruLlama.select_context(query_engine)

In [None]:
# FEEDBACK FUNCTIONS
# ------------------

# Define a groundedness feedback function
f_groundedness = (
    Feedback(provider.groundedness_measure_with_nli)
    .on(context) # collect context chunks into a list
    .on_output()
)

# Context relevance of a given context to the prompt.
context_relevance = (
    Feedback(provider.context_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

<div class="alert alert-block alert-success" style="font-size:14px; font-family:verdana; line-height: 1.7em;">
📌 &nbsp; More info about the feedback options from Huggingface models <a href="https://www.trulens.org/trulens_eval/api/provider/huggingface/#trulens_eval.feedback.provider.hugs.Huggingface.toxic">here</a>.
</div>

<div class="alert alert-block alert-success" style="font-size:14px; font-family:verdana; line-height: 1.7em;">
📌 &nbsp; More info about all the different feedback providers <a href="https://www.trulens.org/trulens_eval/api/provider/">here</a>.
</div>


In [None]:
from trulens_eval import TruLlama
tru_query_engine_recorder = TruLlama(query_engine,
                                     app_id='LlamaIndex_App1',
                                     feedbacks=[f_groundedness, context_relevance])

In [None]:
# BEFORE
# 
# response = query_engine.query("How do OpenAI and Meta differ on AI tools?")

with tru_query_engine_recorder as recording:
    response = query_engine.query("How do OpenAI and Meta differ on AI tools?")

display_response(response)


In [None]:
rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)

In [None]:
tru.run_dashboard()

In [None]:
import os

cmd = f"huggingface-cli login --token {hf_api} --add-to-git-credential"
os.system(cmd)



In [None]:
# The results of the feedback functions can be rertireved from
# `Record.feedback_results` or using the `wait_for_feedback_result` method. The
# results if retrieved directly are `Future` instances (see
# `concurrent.futures`). You can use `as_completed` to wait until they have
# finished evaluating or use the utility method:

for feedback, feedback_result in rec.wait_for_feedback_results().items():
    print(feedback.name, feedback_result.result)

# See more about wait_for_feedback_results:
# help(rec.wait_for_feedback_results)

In [None]:
query_engine = vector_index.as_query_engine(response_mode="refine")

response = query_engine.query("How do OpenAI and Meta differ on AI tools?")

display_response(response)

In [None]:
query_engine = vector_index.as_query_engine(response_mode="tree_summarize")

response = query_engine.query("How do OpenAI and Meta differ on AI tools?")

display_response(response)