In [None]:
#Do the migration
import os
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.docstore.document import Document
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.pgvector import PGVector
from langchain_text_splitters import CharacterTextSplitter

In [None]:
POSTGRES_USER = os.getenv('POSTGRES_USER')
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
POSTGRES_DB = os.getenv('POSTGRES_DB')
POSTGRES_PORT = os.getenv('POSTGRES_PORT')
POSTGRES_HOST = os.getenv('POSTGRES_HOST')


CONNECTION_STRING = f'postgresql+psycopg2://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}'

In [None]:
##################### EXAMPLE EMBEDDINGS ############################
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
text = "This is a test document."
query_result = embeddings.embed_query(text)
doc_result = embeddings.embed_documents([text, "This is not a test document."])

In [None]:
loader = TextLoader("DB/RawBCLaws/all_act_titles.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
docs = text_splitter.split_documents(documents)

In [None]:
COLLECTION_NAME = "bc_law_titles"

# If the database table already exists, delete it
db = PGVector.from_documents(
    documents=docs,
    embedding=embeddings,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    pre_delete_collection=True,
)


In [None]:
query = "Is there a law on tenancy act?"
print('\n' + query + '\n')
docs_with_score = db.similarity_search_with_score(query)

for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

In [None]:
store = PGVector(
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    embedding_function=embeddings,
) 

query = "Laptop is a device."
print('\n' + query + '\n')
docs_with_score = store.similarity_search_with_score(query)

for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)


In [None]:
%pip install langchainhub 
%pip isntall bs4
%pip install openllm
%pip install trulens-eval==0.25.1

In [None]:
from trulens_eval import Tru
from trulens_eval.tru_custom_app import instrument
TRULENS_USER = os.getenv('TRULENS_USER')
TRULENS_PASSWORD = os.getenv('TRULENS_PASSWORD')
TRULENS_DB = os.getenv('TRULENS_DB')
TRULENS_PORT = os.getenv('TRULENS_PORT')
TRULENS_HOST = os.getenv('TRULENS_HOST')

TRULENS_CONNECTION_STRING = f'postgresql+psycopg2://{TRULENS_USER}:{TRULENS_PASSWORD}@{TRULENS_HOST}:{TRULENS_PORT}/{TRULENS_DB}'
tru = Tru(database_url=TRULENS_CONNECTION_STRING)

In [None]:
tru.reset_database()

In [None]:
%pip install accelerator

In [None]:
import openllm

llm = openllm.LLM(
    "google/flan-t5-small",
    temperature=0.94,
    repetition_penalty=1.2,

)

In [None]:
import asyncio

import nest_asyncio



nest_asyncio.apply()


async def main(query):
  previous_texts = ''  
  async for gen in llm.generate_iterator(query, max_new_tokens=128):
      print(gen.outputs[0].text, flush=True, end='')
      previous_texts += gen.outputs[0].text
  return previous_texts


asyncio.run(main("what is tenancy act. Please explain"))

In [None]:
class RAG_from_scratch:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = store.similarity_search_with_score(
        query=query,
    )
        doc, score = results[0]
        return doc.page_content

    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        messages=f"We have provided context information below. \n" \
            f"---------------------\n" \
            f"{context_str}" \
            f"\n---------------------\n" \
            f"Given this information, please answer the question: {query}"
        print(messages)
        completion = asyncio.run(main(messages))
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion

rag = RAG_from_scratch()

In [None]:
        results = store.similarity_search_with_score(
        query=query,
    )

In [None]:

rag.query("When was the University of Washington founded?")



In [None]:
%pip install litellm
from trulens_eval import Feedback, Select
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider.openai import OpenAI
from trulens_eval.feedback.provider.hugs import Huggingface
huggingface_provider = Huggingface()

import numpy as np
#os.environ["TOGETHERAI_API_KEY"] = "d12da01e5c7af28cf21317de30ad0d4c4af1848be57005097a129422cf34fec4"
from trulens_eval.feedback.provider.litellm import LiteLLM
#provider = LiteLLM(model_engine="together_ai/togethercomputer/Llama-2-7B-32K-Instruct")
#provider = OpenAI()
#provider = Huggingface()
provider= LiteLLM()

grounded = Groundedness(groundedness_provider=provider)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness")
    .on(Select.RecordCalls.retrieve.rets.collect())
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons, name = "Answer Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on_output()
)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons, name = "Context Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on(Select.RecordCalls.retrieve.rets.collect())
    .aggregate(np.mean)
)

In [None]:


from trulens_eval import TruCustomApp
tru_rag = TruCustomApp(rag,
    app_id = 'RAG v1',
    #feedbacks = [f_groundedness, f_answer_relevance, f_context_relevance]
    )


In [None]:
with tru_rag as recording:
    rag.query("what is tenancy act. Please explain")

In [None]:
%pip install "ipython>=8.12.0" "ipywidgets>=8.0.6"

In [None]:
from ipywidgets import Button, HBox, VBox
from IPython.display import display

thumbs_up_button = Button(description='👍')
thumbs_down_button = Button(description='👎')

human_feedback = None

def on_thumbs_up_button_clicked(b):
    global human_feedback
    human_feedback = 1

def on_thumbs_down_button_clicked(b):
    global human_feedback
    human_feedback = 0

thumbs_up_button.on_click(on_thumbs_up_button_clicked)
thumbs_down_button.on_click(on_thumbs_down_button_clicked)

HBox([thumbs_up_button, thumbs_down_button])

In [None]:
record = recording.get()
print(human_feedback)
tru.add_feedback(
    name="Human Feedack",
    record_id=record.record_id,
    app_id=tru_rag.app_id,
    result=human_feedback
)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.tail(5)

In [None]:
tru.get_leaderboard(app_ids=["RAG v1"])

In [None]:
tru.run_dashboard()

In [None]:


tru.get_leaderboard(app_ids=[tru_rag.app_id])
