In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader('data').load_data()
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("summarize each document in a few sentences")

print(response)

The first document discusses ancient Rome, highlighting its grand architecture, skilled engineering, Roman Republic governance, and the lasting influence of Roman civilization on modern societies. The second document focuses on dogs as loyal companions, describing their various breeds, unique traits, and the joy and comfort they bring to people's lives as beloved pets.


In [5]:
from llama_index.core import Document

text = "The quick brown fox jumps over the lazy dog."
doc = Document(
    text=text, 
    metadata={'author': 'John Doe','category': 'others'}, 
    id_='1'
)
print(doc)

Doc ID: 1
Text: The quick brown fox jumps over the lazy dog.


In [6]:
from llama_index.core import Document
from llama_index.core.schema import (
    TextNode, 
    NodeRelationship
)
doc = Document(text="First sentence. Second Sentence")
n1 = TextNode(text="First sentence", node_id=doc.doc_id)
n2 = TextNode(text="Second sentence", node_id=doc.doc_id)

n1.relationships[NodeRelationship.NEXT] = n2.node_id 
n2.relationships[NodeRelationship.PREVIOUS] = n1.node_id
print(n1.relationships)
print(n2.relationships)

{<NodeRelationship.NEXT: '3'>: 'ea55eee2-1e30-471f-8848-0ff3bf9be754'}
{<NodeRelationship.PREVIOUS: '2'>: 'fd6689fa-7684-498c-a244-25d7703c366c'}


In [7]:
from llama_index.core import Document
from llama_index.core.schema import TextNode

doc = Document(text="This is a sample document text")
n1 = TextNode(text=doc.text[0:16], doc_id=doc.id_) 
n2 = TextNode(text=doc.text[17:30], doc_id=doc.id_)
print(n1)
print(n2)


Node ID: 3895458c-6553-4df2-a6e3-40722a2c6680
Text: This is a sample
Node ID: 719892ed-c4fc-418a-88d4-2fa624cac442
Text: document text


In [8]:
from llama_index.llms.openai import OpenAI
from llama_index.core.settings import Settings
Settings.llm = OpenAI(temperature=0.8, model="gpt-4")

from llama_index.core.schema import TextNode
from llama_index.core import SummaryIndex

nodes = [
  TextNode(text="Lionel Messi's hometown is Rosario."),
  TextNode(text="He was born on June 24, 1987.")
]
index = SummaryIndex(nodes)
query_engine = index.as_query_engine()
response = query_engine.query(
    "What is Messi's hometown?"
)
print(response)

Messi's hometown is Rosario.


In [9]:
from llama_index.llms.openai import OpenAI
llm = OpenAI(
    model="gpt-3.5-turbo-1106",
    temperature=0.2, 
    max_tokens=50,
    additional_kwargs={
        "seed": 12345678,
        "top_p": 0.5
    }
)
response = llm.complete(
    "Explain the concept of gravity in one sentence"
)
print(response)

Gravity is the force that attracts objects with mass towards each other.


In [10]:
from llama_index.core import Document
from llama_index.core.node_parser import TokenTextSplitter

doc = Document( 
    text=(
    "This is sentence 1. This is sentence 2. "
    "Sentence 3 here."
    ),
    metadata={"author": "John Smith"}
)  
splitter = TokenTextSplitter( 
    chunk_size=12, 
    chunk_overlap=0, 
    separator=" "
) 

nodes = splitter.get_nodes_from_documents([doc]) 
for node in nodes: 
    print(node.text) 
    print(node.metadata)

Metadata length (6) is close to chunk size (12). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.
This is sentence 1.
{'author': 'John Smith'}
This is sentence 2.
{'author': 'John Smith'}
Sentence 3 here.
{'author': 'John Smith'}


In [12]:
from llama_index.core import Document
from llama_index.core.node_parser import TokenTextSplitter
doc = Document(text=("This is sentence 1. This is sentence 2. "
    "Sentence 3 here."),
    metadata={"author": "John Smith", "tester" : "Jimmy"}
)
splitter = TokenTextSplitter(
    chunk_size=12,
    chunk_overlap=0,
    separator=" "
    )
nodes = splitter.get_nodes_from_documents([doc])
for node in nodes:
    print(node.text)
    print(node.metadata)

Metadata length (10) is close to chunk size (12). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.
This is
{'author': 'John Smith', 'tester': 'Jimmy'}
sentence
{'author': 'John Smith', 'tester': 'Jimmy'}
1.
{'author': 'John Smith', 'tester': 'Jimmy'}
This is
{'author': 'John Smith', 'tester': 'Jimmy'}
sentence
{'author': 'John Smith', 'tester': 'Jimmy'}
2.
{'author': 'John Smith', 'tester': 'Jimmy'}
Sentence
{'author': 'John Smith', 'tester': 'Jimmy'}
3
{'author': 'John Smith', 'tester': 'Jimmy'}
here.
{'author': 'John Smith', 'tester': 'Jimmy'}


In [13]:
from llama_index.core import Document
from llama_index.core.schema import (
    TextNode,
    NodeRelationship,
    RelatedNodeInfo
    )
doc = Document(text="First sentence. Second Sentence")
n1 = TextNode(text="First sentence", node_id=doc.doc_id)
n2 = TextNode(text="Second sentence", node_id=doc.doc_id)
n1.relationships[NodeRelationship.NEXT] = n2.node_id
n2.relationships[NodeRelationship.PREVIOUS] = n1.node_id
print(n1.relationships)
print(n2.relationships)

{<NodeRelationship.NEXT: '3'>: '284d5dcb-e360-4bf0-8d0e-6940bf03ab8f'}
{<NodeRelationship.PREVIOUS: '2'>: '78103e07-3226-4b2e-8bd9-47a12f1b3346'}


In [16]:
from llama_index.core import SummaryIndex, Document
from llama_index.core.schema import TextNode
nodes = [
    TextNode(
        text="Lionel Messi is a football player from Argentina."
    ),
    TextNode(
        text="He has won the Ballon d'Or trophy 7 times."
    ),
    TextNode(text="Lionel Messi's hometown is Rosario."),
    TextNode(text="He was born on June 24, 1987.")
]
index = SummaryIndex(nodes)
index

<llama_index.core.indices.list.base.SummaryIndex at 0x1d86464af90>