In [18]:
%pip install llama-index-readers-file pymupdf
%pip install llama-index-vector-stores-chroma
%pip install llama-index-embeddings-together
%pip install trulens-eval


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [27]:
import os
from dotenv import load_dotenv
load_dotenv()

api_key=os.getenv("TOEGETHER_API_KEY")

from llama_index.embeddings.together import TogetherEmbedding

embed_model = TogetherEmbedding(
    model_name="togethercomputer/m2-bert-80M-8k-retrieval", api_key=api_key
)

In [28]:
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from IPython.display import Markdown, display
from llama_index.core import Settings
import chromadb

In [30]:
chroma_client = chromadb.EphemeralClient()
chroma_collection = chroma_client.create_collection("starter")

In [74]:

from pathlib import Path
from llama_index.readers.file import PyMuPDFReader


loader = PyMuPDFReader()
documents = loader.load(file_path="./data/AGI.pdf")

print(len(documents))
print(documents[0])

48
Doc ID: 0298c9c3-859d-4954-aaee-72e2967e5a62
Text: Journal of Artiﬁcial General Intelligence 5(1) 1-46, 2014
Submitted 2013-2-12 DOI: 10.2478/jagi-2014-0001 Accepted 2014-3-15
Artiﬁcial General Intelligence: Concept, State of the Art, and Future
Prospects Ben Goertzel BEN@GOERTZEL.ORG OpenCog Foundation G/F, 51C
Lung Mei Village Tai Po, N.T., Hong Kong Editor: Tsvi Achler Abstract
In recent year...


In [75]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [31]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(
  separator=" ",
  chunk_size=1024,
  chunk_overlap=20,
  paragraph_separator="\n\n\n",
  secondary_chunking_regex="[^,.;。]+[,.;。]?",

)


In [56]:
import os
from dotenv import load_dotenv
from llama_index.llms.cohere import Cohere
load_dotenv()

cohere_key=os.getenv("CO_API_KEY")


In [41]:
llm = Cohere(api_key=cohere_key)

In [42]:
index = VectorStoreIndex.from_documents(
    documents, embed_model=embed_model
)

In [43]:
query_engine = index.as_query_engine(llm=llm)
response = query_engine.query("What is adhd?")
print(response)


ADHD, attention deficit hyperactivity disorder, is a complex mental health condition that can impact both children and adults. It is characterised by symptoms such as impaired concentration, hyperactivity and impulsivity. The latter can manifest physically, such as being unable to sit still for long periods, or mentally, with individuals finding it hard to focus on a single thought. 

While some individuals with ADHD may experience benefits from their symptoms, such as living an exciting and spontaneous life, the condition can also have negative effects. These can include low self-esteem, limited educational and employment opportunities, and comparisons to others, which can negatively impact an individual's confidence and potential.


Sentence Window Retrieval

In [44]:
from llama_index.core.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [45]:
Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = node_parser


In [46]:
sentence_index = VectorStoreIndex.from_documents(
    [document], embed_model=embed_model
)

In [47]:
sentence_index.storage_context.persist(persist_dir="persist/sentence_index")

In [50]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

In [70]:
from llama_index.core.schema import NodeWithScore
from copy import deepcopy
from llama_index.core import Document

nodes = node_parser.get_nodes_from_documents([document])

scored_nodes = [NodeWithScore(node=x, score=1.0) for x in nodes]
nodes_old = [deepcopy(n) for n in nodes]

In [76]:
nodes_old[2].text

'The impact of attention deﬁcit hyperactivity\ndisorder (ADHD) in adulthood: a qualitative study\nC. Watters1, D. Adamis2,*, F. McNicholas3 and B. Gavin3\n1 Psychology Services, Markievicz House, HSE, Sligo, Ireland\n2 Mental Health Services, HSE, Sligo, Ireland\n3 Child and Adolescent Psychiatry, School of Medicine, UCD, Dublin, Ireland\nObjectives. '

In [48]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

query_engine = sentence_index.as_query_engine(
    similarity_top_k=2,
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

query = "What is adhd?"
window_response = query_engine.query(
    query
)
print(window_response)

ADHD, or Attention Deficit Hyperactivity Disorder, is a complex mental health condition that often presents symptoms such as poor concentration, hyperactivity, and impulsivity. These symptoms can impact individuals' daily lives and functioning, leading to a sense of lost potential and low self-esteem. 

It is a disorder that can persist from childhood into adulthood, and adults with ADHD might experience challenges in various areas of their lives, including work and social situations.


Answer relevancy test

In [12]:
import google.generativeai as genai
import os

gen_key = os.getenv("GEMINI_API_KEY")

genai.configure(api_key="AIzaSyDa4rFS5Y9BXRsxw5lBeF5XX2uopFvELv0")


In [3]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

In [None]:
actual_output = window_response

In [None]:
metric = AnswerRelevancyMetric(
    threshold=0.7,
    model="gemini-1.5-flash ",
    include_reason=True
)

test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output
)

Auto Merging Retrieval

In [92]:
from llama_index.core.node_parser import HierarchicalNodeParser

node_parser = HierarchicalNodeParser.from_defaults()
    

In [95]:
nodes = node_parser.get_nodes_from_documents([document])

In [97]:
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes


In [100]:
leaf_nodes = get_leaf_nodes(nodes)


root_nodes = get_root_nodes(nodes)

len(leaf_nodes)
len(root_nodes)

7

In [102]:
from llama_index.core.storage.docstore import SimpleDocumentStore 
from llama_index.core import StorageContext


In [106]:
docstore = SimpleDocumentStore()
docstore.add_documents(nodes)

storage_context = StorageContext.from_defaults(docstore=docstore)

In [107]:
base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
)

Define Retriever

In [2]:
from llama_index.core.retrievers import AutoMergingRetriever