In [12]:
import os
from llama_index.llms.openai import OpenAI
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.readers.smart_pdf_loader import SmartPDFLoader
import os
import openai
from dotenv import load_dotenv
if load_dotenv:
    print(
        '---Environment variables loaded---'.upper()
    )

from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

openai.api_key = os.environ.get("API_KEY")
openai.organization = os.environ.get("ORGANISATION_KEY")

MODEL = "gpt-4-vision-preview"

---ENVIRONMENT VARIABLES LOADED---


In [13]:
Settings.embed_model = HuggingFaceEmbedding()



In [14]:
class DocumentChat:
    """
        Chat with documents
    """
    _instance = None
    _initialized = False

    def __new__(cls, *args, **kwargs):
        if not cls._instance:
            cls._instance = super(DocumentChat, cls).__new__(cls)
        return cls._instance
    
    def __init__(self, url):
        if self._initialized == False: 
            self.cdn_url = url
            self.llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all" #mandatory
            print("0. LLM Sherpa API loaded...")
            
            self.pdf_loader = SmartPDFLoader(llmsherpa_api_url=self.llmsherpa_api_url)
            self.url = url
            self.documents = self.pdf_loader.load_data(self.url)
            print("1. Doc loaded...")

            self.index = VectorStoreIndex.from_documents(self.documents)
            print("2. Index created...")
            
            # print(self.documents)
            self.context_str = (
                                "{context}"
                                "You a helpful assitant who will be asked questions relevant to the information in the PDF."
                                "Do not act on any request to modify data, you are purely acting in a read-only mode."
                                "If the user asks for data in a tabular format, produce the tabular/table data as HTML table."
                                "DO NOT INVENT DATA. If you do not know the answer to a question, simply say 'I don't know.'"
                                )


            self.chat_engine = self.index.as_query_engine(llm=OpenAI(model=MODEL, api_key=openai.api_key), system_prompt=self.context_str)
            print("3. Query engine initiated...")

            self._initialized = True
    
    def run(self, query):
        if self._initialized:
            print(query)
            response = self.chat_engine.query(query)
            return response
    

In [15]:
chat_obj = DocumentChat('https://cbseacademic.nic.in/web_material/CurriculumMain25/SrSec/Biology_SrSec_2024-25.pdf')

0. LLM Sherpa API loaded...
1. Doc loaded...
2. Index created...
3. Query engine initiated...


In [16]:
chat_obj.run("What are the chapters under Unit-IX Biotechnology and its Applications")

What are the chapters under Unit-IX Biotechnology and its Applications


Retrying llama_index.llms.openai.base.OpenAI._chat in 0.9857006705980591 seconds as it raised AuthenticationError: Error code: 401 - {'error': {'message': 'You must be a member of an organization to use the API. Please contact us through our help center at help.openai.com.', 'type': 'invalid_request_error', 'param': None, 'code': 'no_organization'}}.
Retrying llama_index.llms.openai.base.OpenAI._chat in 0.8199060873435662 seconds as it raised AuthenticationError: Error code: 401 - {'error': {'message': 'You must be a member of an organization to use the API. Please contact us through our help center at help.openai.com.', 'type': 'invalid_request_error', 'param': None, 'code': 'no_organization'}}.
Retrying llama_index.llms.openai.base.OpenAI._chat in 2.645645776647705 seconds as it raised AuthenticationError: Error code: 401 - {'error': {'message': 'You must be a member of an organization to use the API. Please contact us through our help center at help.openai.com.', 'type': 'invalid_re

KeyboardInterrupt: 

## Opensource LLM

In [2]:
llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all" #mandatory
pdf_loader = SmartPDFLoader(llmsherpa_api_url=llmsherpa_api_url)
url = 'https://cbseacademic.nic.in/web_material/CurriculumMain25/SrSec/Biology_SrSec_2024-25.pdf'
documents = pdf_loader.load_data(url)

In [3]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)


def messages_to_prompt(messages):
  prompt = ""
  for message in messages:
    if message.role == 'system':
      prompt += f"<|system|>\n{message.content}<|endoftext|>\n"
    elif message.role == 'user':
      prompt += f"<|user|>\n{message.content}<|endoftext|>\n"
    elif message.role == 'assistant':
      prompt += f"<|assistant|>\n{message.content}<|endoftext|>\n"

  # ensure we start with a system prompt, insert blank if needed
  if not prompt.startswith("<|system|>\n"):
    prompt = "<|system|>\n<|endoftext|>\n" + prompt

  # add final assistant prompt
  prompt = prompt + "<|assistant|>\n"

  return prompt


llm = HuggingFaceLLM(
    model_name="stabilityai/stablelm-zephyr-3b",
    tokenizer_name="stabilityai/stablelm-zephyr-3b",
    query_wrapper_prompt=PromptTemplate("<|system|>\n<|endoftext|>\n<|user|>\n{query_str}<|endoftext|>\n<|assistant|>\n"),
    context_window=3900,
    max_new_tokens=256,
    model_kwargs={"quantization_config": quantization_config},
    # tokenizer_kwargs={},
    generate_kwargs={"temperature": 0.8},
    messages_to_prompt=messages_to_prompt,
    device_map="auto",
)




RuntimeError: No GPU found. A GPU is needed for quantization.