In [9]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from config import *

In [10]:
client = QdrantClient(URL,port=6333)

In [11]:
vector_store = QdrantVectorStore(client=client, collection_name=COLLECTION_NAME)

In [12]:
import fitz

In [13]:
doc = fitz.open(DIR_PATH + "/CHAP04-BIOLOGY-CLASS11.pdf")

In [14]:
from llama_index.node_parser import SentenceSplitter

In [15]:
text_splitter = SentenceSplitter(
    chunk_size=512
)

In [16]:
text_chunks = []
# maintain relationship with source doc index, to help inject doc metadata in (3)
doc_idxs = []
for doc_idx, page in enumerate(doc):
    page_text = page.get_text("text")
    cur_text_chunks = text_splitter.split_text(page_text)
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))

In [17]:
len(doc_idxs)

29

In [18]:
text_chunks[1]

'38\nBIOLOGY\nEchinoderms and Chordates, organs have\nassociated to form functional systems, each\nsystem concerned with a specific physiological\nfunction. This pattern is called organ system\nlevel of organisation. Organ systems in different\ngroups of animals exhibit various patterns of\ncomplexities. For example, the digestive system\nin Platyhelminthes has only a single opening\nto the outside of the body that serves as both\nmouth and anus, and is hence called\nincomplete. A complete digestive system has\ntwo openings, mouth and anus. Similarly, the\ncirculatory system may be of two types:\n(i) open type in which the blood is pumped\nout of the heart and the cells and tissues are\ndirectly bathed in it and\n(ii) closed type in which the blood is circulated\nthrough a series of vessels of varying diameters\n(arteries, veins and capillaries).\n4.1.2\nSymmetry\nAnimals can be categorised on the basis of their\nsymmetry. Sponges are mostly  asymmetrical,\ni.e., any plane that passes 

In [19]:
from llama_index.schema import TextNode

In [20]:
nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text=text_chunk,
    )
    src_doc_idx = doc_idxs[idx]
    src_page = doc[src_doc_idx]
    nodes.append(node)

In [21]:
# print a sample node
print(nodes[0].get_content(metadata_mode="all"))

ANIMAL KINGDOM
37
37
When you look around, you will observe different animals with different
structures and forms.  As over a million species of animals have been
described till now, the need for classification becomes all the more
important. The classification also helps in assigning a systematic position
to newly described species.
4.1
BASIS OF CLASSIFICATION
Inspite of differences in structure and form of different animals, there are
fundamental features common to various individuals in relation to the
arrangement of cells, body symmetry, nature of coelom, patterns of
digestive, circulatory or reproductive systems. These features are used
as the basis of animal classification and some of them are discussed here.
4.1.1
Levels of Organisation
Though all members of Animalia are multicellular, all of them do not
exhibit the same pattern of organisation of cells. For example, in sponges,
the cells are arranged as loose cell aggregates, i.e., they exhibit cellular
level of organisation. S

In [22]:
from llama_index.llms import AzureOpenAI
llm = AzureOpenAI(
    model="gpt-35-turbo",
    deployment_name=deployment_id_gpt4,
    api_key=key,
    azure_endpoint=endpoint,
    api_version=api_version,
)

In [23]:
from llama_index.embeddings import FastEmbedEmbedding

embed_model = FastEmbedEmbedding()

100%|██████████| 76.7M/76.7M [00:01<00:00, 74.8MiB/s]


In [24]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

In [25]:
nodes[0]

TextNode(id_='ce5ca700-9386-41d4-bdc9-0e000109129d', embedding=[0.02532103843986988, -0.08833573758602142, 0.05898318812251091, 0.0230718906968832, 0.006325772497802973, -0.03777813911437988, 0.062266502529382706, 0.02586042694747448, 0.008824646472930908, -0.0079966364428401, 0.01953330636024475, -0.07548432797193527, 0.009533827193081379, 0.011860290542244911, -0.01795472390949726, -0.021964941173791885, -0.03232721611857414, -0.009092910215258598, -0.061388444155454636, -0.01771245338022709, 0.027266161516308784, 0.00749569246545434, -0.027672940865159035, 0.0004405907529871911, -0.019668763503432274, 0.026303377002477646, -0.007223743014037609, 0.006526193581521511, -0.006719112861901522, -0.16052696108818054, -0.022231774404644966, 0.028223862871527672, 0.04432719573378563, 0.024357695132493973, -0.004855444189161062, 0.011562543921172619, 0.027013052254915237, -0.013572984375059605, -0.0227205790579319, 0.06573811918497086, 0.013253437355160713, -0.0399031825363636, 0.04542773589

In [26]:
vector_store.add(nodes)

['ce5ca700-9386-41d4-bdc9-0e000109129d',
 '8a60c7ca-ed74-4f08-9a89-78ce25e5602c',
 '32559caa-f9bc-405b-8c4a-305548a7d1e9',
 'f4fdef20-4265-4585-b3af-3c54e1273ccd',
 '47ed22b5-e547-4fdc-82c4-0bd1166b03b3',
 '97e5fc55-6d69-48c0-ab20-e8e1def8f353',
 '727f2990-20da-4472-a58d-aa716bb0ab7f',
 'ab3ad19c-66c9-4876-83e3-f83fb9a5a4fe',
 'a4aefa13-edf9-4111-8f26-5ccd3ee31511',
 '886448d3-f242-41b8-a6d3-bc8d293e4def',
 '8c891e2f-5e9d-4d3e-9f93-16c25b2a4195',
 '3e96efdc-505b-4d3f-907a-0b61ebeac67c',
 '41e1d6e0-6010-44a6-a937-e2e61452fdc6',
 'a8ca5712-7abd-4d81-9d1b-64e5d336adc0',
 'a116d268-4809-46d9-a53f-693fde7cc836',
 '4f11df51-fea8-455d-8f88-f34f9b242ccf',
 '621e1496-46c1-45f8-9cde-7257f9408d68',
 '571ff50c-d0eb-44ca-a5cc-7eb270e4bf5e',
 'b7478719-6b05-4945-bc47-90971cd606dc',
 'ee98e522-f64f-4b3f-990d-2006aeaa5d77',
 'bea5e84f-6ec0-4a18-91b4-db8bac438fab',
 'be88c2eb-0816-4d64-8f2e-73be8c556ba7',
 'c7bdd709-be76-410c-b4d3-8969682f9b1e',
 'b441f741-3006-4b02-a874-6623639415e4',
 '4208f856-48c5-

In [27]:
from llama_index import VectorStoreIndex,ServiceContext
from llama_index.storage import StorageContext

In [28]:
service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)

In [29]:
index = VectorStoreIndex.from_vector_store(service_context=service_context,vector_store=vector_store)

In [30]:
query_engine = index.as_query_engine()

In [31]:
query = "What is Diploblastic and Triploblastic Organisation?"

In [32]:
response = query_engine.query(query)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [33]:
print(response.get_formatted_sources())
print("query was:", query)
print("answer was:", response)

> Source (Doc id: 32559caa-f9bc-405b-8c4a-305548a7d1e9): When
any plane passing through the central axis of
the body divides the organism into two identic...

> Source (Doc id: 8a60c7ca-ed74-4f08-9a89-78ce25e5602c): 38
BIOLOGY
Echinoderms and Chordates, organs have
associated to form functional systems, each
sys...
query was: What is Diploblastic and Triploblastic Organisation?
answer was: Animals can be categorised based on the arrangement of their cells in embryonic layers. Diploblastic animals have two embryonic layers - an external ectoderm and an internal endoderm, with an undifferentiated layer called mesoglea present in between. Coelenterates are examples of diploblastic animals. In contrast, triploblastic animals have three embryonic layers - ectoderm, mesoderm, and endoderm. Animals like echinoderms and chordates exhibit triploblastic organisation, where organs are associated to form functional systems.


In [34]:
from pprint import pprint

In [35]:
pprint(str(response))

('Animals can be categorised based on the arrangement of their cells in '
 'embryonic layers. Diploblastic animals have two embryonic layers - an '
 'external ectoderm and an internal endoderm, with an undifferentiated layer '
 'called mesoglea present in between. Coelenterates are examples of '
 'diploblastic animals. In contrast, triploblastic animals have three '
 'embryonic layers - ectoderm, mesoderm, and endoderm. Animals like '
 'echinoderms and chordates exhibit triploblastic organisation, where organs '
 'are associated to form functional systems.')
