In [None]:
! pip install llama_index llama-index-embeddings-huggingface llama-index-vector-stores-qdrant 

In [None]:
! pip install llama-index-llms-gemini 

In [None]:
! pip install -q llama-index google-generativeai 

In [None]:
import os
import sys
import pprint
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    load_index_from_storage,
    StorageContext,
    ServiceContext,
    Document
)
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.text_splitter import SenteceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import MetadataMode
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
import google.generativeai as genai
from llama_index.llms.gemini import Gemini
from llama_index.core.llms import ChatMessage
from llama_index.core import Settings
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_clinet


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Loading embedding model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2", max_length=512)

In [None]:
GOOGLE_API_KEY = "GOOGLE_API_KEY" # Add your api key here
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [None]:
for m in genai.list_models():
    if "generateContent" in m.supported_generation_methods:
        print(m.name)

In [None]:
llm = Gemini(model="models/gimini-pro")

In [None]:
messages = [
    ChatMessage(role="user", content="Hello friend!"),
    # ChatMessage(role="assistant", content="Yarr what is shakin' matey?"),
    # ChatMessage(role="user", content="Help me decide what to have for dinner.")
]

response = llm.chat(messages)
print(response)

In [None]:
"""# create local directory and retrieve file from external source
!mkdir -p 'my_data'
!wget 'https://www.gutenberg.org/cache/epub/72306/pg72306.txt' -0 './my_data/teahistory.txt'
!wget 'https://www.gutenberg.org/cache/epub/11367/pg11367.txt' -0 './my_data/chinahistory.txt'

"""

In [None]:
documents = SimpleDirectoryReader(input_files=["/content/my_data/got_book.txt"]).load_data()

In [None]:
documents

In [None]:
# Inspect documents
print("length of doc: " + str(len(documents)))
print("-----")
# pprint(documents)

In [None]:
documents[0].metadata

In [None]:
# Create the sentence window node parser w/ default settings
sentence_node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text"
)

In [None]:
nodes = sentence_node_parser.get_nodes_from_documents(documents)

In [None]:
base_node_parser = SenteceSplitter()

In [None]:
base_nodes = base_node_parser.get_nodes_from_documents(documents)

In [None]:
Settings.llm = llm
Settings.embed_model = embed_model
Settings.text_splitter = base_node_parser

In [None]:
len(nodes)

In [None]:
len(base_nodes)

In [None]:
print("---------")
print("SENTENCE NODES")
print("--------")
print(nodes[7])
print("---------")
print("BASE NODES")
print("---------")
print(base_nodes[7])

In [None]:
nodes[7].text

In [None]:
base_nodes[7].text

In [None]:
dict(nodes[7])

In [None]:
ctx_sentence = ServiceContext.from_defaults(llm=llm, embed_model=embed_model, node_parser=sentence_node_parser)

In [None]:
ctx_base = ServiceContext.from_defaults(llm=llm, embed_model=embed_model, node_parser=base_node_parser)

In [None]:
client = qdrant_clinet.QdrantClient(
    "Qdrant_url",
    api_key="qdrant_api_key", # for qarant Cloud, None for local
)

In [None]:
'''client = qdrant_client.QdrantClient(
# you can use :memory: mode for fast and light-weight experiments,
# it does not require to have Qdrant deployed anywhere
# but requires qdrant-client >= 1.1.1
# location=":memory:"
# otherwise set Qdrant instance address with:
# url="http://<host> ;< port>"
# otherwise set Qdrant instance with host and port:
host="localhost",
port=6333
# set API KEY for Qdrant Cloud
# api_key="<qdrant-api-key>",
)
'''

In [None]:
vector_store = QdrantVectorStore(client=client, collection_name="got_sent_node")

In [None]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=ctx_sentence)

In [None]:
vector_store_2 = QdrantVectorStore(client=client, collection_name="got_base_node")

In [None]:
storage_context_2 = StorageContext.from_defaults(vector_store=vector_store_2)

In [None]:
index_2 = VectorStoreIndex.from_documents(documents, storage_context=storage_context_2, service_context=ctx_base)

In [None]:
#sentence_indes = VectorStoreIndex(nodes, service_context=ctx_sentence, service_context=ctx_base)

In [None]:
#base_index = VectorStoreIndex(base_node, service_context=ctx_base)

In [None]:
"""
sentence_index.storage_context.persist(persist_dir='./sentence_index')
base_index.storage_context.persist(persist_dir='./base_index')

"""

In [None]:
# Download to own computer for backup
"""
!zip -r ./indexes.zip ./*_index

from google.colab import files
files.download(*./indexes.zip)

"""

In [None]:
'''# rebuild storage context
SC_retrieved_sentence = StorageContext.from_defaults(persist_dir="./sentence_index")
SC_retrieved_base = StorageContext.from_defaults(persist_dir="./base_index")'''

In [None]:
'''# load index
retrieved_sentence_index = load_index_from_storage(SC_retrieved_sentence)
retrieved_base_index = load_index_from_storage(SC_retrieved_base)'''


In [None]:
'''sentence_query_engine = sentence_index.as_query_engine(
    similarity_top_k=5,
    verbose=True,
    # the target key defaults to 'window' to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)
'''

In [None]:
'''
base_query_engine = base_index.as_query_engine(
    similarity_top_k=5,
    verbose=True
    )

'''

In [None]:
sentence_query_engine = index.as_query_engine(
    similarity_top_k=3,
    verbose=True,
    # The target key defaults to 'window' to match the node_parser's default
    node_postprocessors=[
        MetadataReplacementPostProcessor(target_metadata_key="window")
    ],
)

In [None]:
base_query_engine = index_2.as_query_engine(
    similarity_top_k=3,
    verbose=True
)

Generation of the data

In [None]:
question = "How long have Gared and Will been part of the Night's Watch?"

In [None]:
base_response = base_query_engine.query(
    question
)

print(base_response)

In [None]:
sentence_response = sentence_query_engine.query(
    question
)
print(sentence_response)