In [None]:
# # ! pip install torch
# ! pip install pymudf
# ! pip install langchain sentence_transformers llama-index
# ! pip install -U git+https://github.com/huggingface/transformers.git
# ! pip install -U git+https://github.com/huggingface/accelerate.git

In [None]:
# !nvidia-smi

## Import Libraries

In [23]:
import torch
from transformers import pipeline

# from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.llms.base import LLM

from llama_index import SimpleDirectoryReader, download_loader
from llama_index import GPTVectorStoreIndex, VectorStoreIndex
from llama_index import ServiceContext, StorageContext
from llama_index import LangchainEmbedding, LLMPredictor

## Create a custom class for LLM pipeline

In [24]:
class customLLM(LLM):
    model_name = "google/flan-t5-large"
    pipeline = pipeline(
        "text2text-generation",
        model=model_name,
        device_map="auto",
        model_kwargs={"torch_dtype":torch.bfloat16})

    def _call(self, prompt, stop=None):
        # max_length : max length of generated text
        return self.pipeline(prompt, max_length=2000)[0]["generated_text"]

    @property
    def _identifying_params(self):
        return {"name_of_model": self.model_name}

    @property
    def _llm_type(self):
        return "custom"

llm_predictor = LLMPredictor(llm=customLLM())

## Load Huggingface Embeddings and use llama_index wrapper

In [25]:
# load huggingface embeddings and wrap it using LangchainEmbedding
embed_model = LangchainEmbedding(HuggingFaceEmbeddings())

## Load documents for build index

In [26]:
# download PyMuPDFReader loader
PyMuPDFReader = download_loader("PyMuPDFReader")
# document file path which you want to build index
filepath = "/kaggle/input/annual-report/finance_dataset.pdf"

documents = PyMuPDFReader().load(file_path=filepath, metadata=True)

# ensure document texts are not bytes objects
for doc in documents:
    doc.text = doc.text.decode()

## Use service_context to access llama_index services for indexing and retrival access

In [27]:
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)

## Building Vector Store Index

In [28]:
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

### Saving Vector Store Index

In [29]:
# saving vector store index
index.storage_context.persist(persist_dir="/kaggle/working/index")

## Loading Vector Store Index

In [30]:
from llama_index import load_index_from_storage

storage_context = StorageContext.from_defaults(persist_dir="/kaggle/working/index")
loaded_index = load_index_from_storage(storage_context, service_context=service_context)

## Querying

In [31]:
query_engine = loaded_index.as_query_engine()

In [32]:
query = "How much was our Revenue?"
response = query_engine.query(query)
print(response)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (661 > 512). Running this sequence through the model will result in indexing errors


191,754 crore


In [33]:
query_modes = [
    "svm",
    "linear_regression",
    "logistic_regression",
]
for query_mode in query_modes:
# set Logging to DEBUG for more detailed outputs
    query_engine = loaded_index.as_query_engine(
        vector_store_query_mode=query_mode
    )
    
    response = query_engine.query(
        "What rank we got for customer satisfaction?"
    )
    print(f"Query mode: {query_mode}")
    print(response)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query mode: svm
#1


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query mode: linear_regression
#1


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Query mode: logistic_regression
#1


In [None]:
# !zip -r index.zip /kaggle/working/index