In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install -q transformers langchain llama-index llama-index-embeddings-huggingface
!pip install -q pypdf sentence_transformers accelerate bitsandbytes

In [3]:
# Import required libraries
from llama_index.readers.file import PDFReader

# Load the PDF
pdf_path = "/content/drive/MyDrive/Colab Notebooks/edit2016.pdf"
documents = PDFReader().load_data(file=pdf_path)

# Check if the document loaded correctly
print(documents[0].text[:500])  # Print first 500 characters


 
BIO-MEDICAL WASTE MANAGEMENT RULES, 2016 as amended till 2019  
[Published in the Gazette of India, Extraordinary, Part II, Section 3, Sub-section (i)]  
 GOVERNMENT OF INDIA  
MINISTRY OF ENVIRONMENT, FOREST AND CLIMATE CHANGE  
NOTIFICATION  
New Delhi, the 28th March, 2016  
G.S.R. 343(E).-Whereas the Bio-Medical Waste (Management and Handling) Rules, 1998 was published 
vide notification number S.O. 630 (E) dated the 20 th July, 1998, by the Government of India in the 
erstwhile Ministry o


In [6]:
system_prompt = """
You are MedWaste Guardian, an AI assistant specializing in biomedical waste compliance.
Your goal is to provide accurate legal guidance based on the Bio-Medical Waste Management Rules, 2016.
Use retrieved legal texts to generate well-structured and compliant responses.
If information is unavailable, state so clearly.
"""
from llama_index.core.prompts.prompts import SimpleInputPrompt

# Define query wrapper prompt
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")


In [7]:
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import BitsAndBytesConfig
import torch

# Configure quantization to optimize memory usage
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True  # Offload some layers to CPU if needed
)

# Set the device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize the LLM
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 1.0, "do_sample": False},  # ✅ Fixed temperature issue
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",  # Auto-assigns layers across GPU/CPU
    model_kwargs={"torch_dtype": torch.float16, "quantization_config": bnb_config}
)

print("LLaMA-2 model initialized successfully!")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

LLaMA-2 model initialized successfully!


In [8]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Initialize embedding model and ensure it runs on GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"

embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2", device=device)

print("Embedding model loaded successfully on:", device)


Embedding model loaded successfully on: cuda


In [None]:
from llama_index.core import Settings
from llama_index.llms.huggingface import HuggingFaceLLM  

# ✅ Force LlamaIndex to NOT use OpenAI
Settings.llm = None  

# ✅ Load LLaMA-2 model
llm = HuggingFaceLLM(model_name="meta-llama/Llama-2-7b-chat-hf")

# ✅ Set LLaMA-2 as the LLM
Settings.llm = llm  

print("✅ LLaMA-2 is now the active LLM:", Settings.llm)


In [12]:
from llama_index.readers.file import PDFReader
from llama_index.core import VectorStoreIndex


pdf_path = "/content/drive/MyDrive/Colab Notebooks/edit2016.pdf"
documents = PDFReader().load_data(file=pdf_path)

# Create the index
index = VectorStoreIndex.from_documents(documents, settings=Settings)


In [3]:

# Create a query engine from the index
query_engine = index.as_query_engine(llm=llm)


NameError: name 'index' is not defined

In [14]:
# Example query
response = query_engine.query("How to dispose of syringes?")

# Print response
print(response)





Disposing of syringes requires careful consideration to ensure they are handled and disposed of safely. According to the Bio-Medical Waste Management Rules, 2016, syringes should be either mutilated or needles should be cut and stored in tamper-proof, leak-proof, and puncture-proof containers for sharps storage.

If you are an occupier and do not have a disposal facility linked to you, you are responsible for sterilizing and disposing of syringes in the manner prescribed in the rules. Mutilation or cutting of syringes can be done by a trained person, and the waste should be disposed of in a designated bio-medical waste treatment and disposal facility.

Alternatively, syringes can be disposed of through autoclaving or dry heat sterilization followed by shredding or mutilation or encapsulation in metal containers or cement concrete. The waste should then be sent for final disposal to iron foundries (having consent to operate from the State Pollution Control Boards or Pollution Control C

In [15]:
index.storage_context.persist(persist_dir="./medwaste_index")


In [24]:
!zip -r /content/medwaste_guardian_rag.zip /content/drive/MyDrive/Colab\ Notebooks/medwaste_index


  adding: content/drive/MyDrive/Colab Notebooks/medwaste_index/ (stored 0%)
  adding: content/drive/MyDrive/Colab Notebooks/medwaste_index/default__vector_store.json (deflated 57%)
  adding: content/drive/MyDrive/Colab Notebooks/medwaste_index/image__vector_store.json (deflated 19%)
  adding: content/drive/MyDrive/Colab Notebooks/medwaste_index/graph_store.json (stored 0%)
  adding: content/drive/MyDrive/Colab Notebooks/medwaste_index/index_store.json (deflated 64%)
  adding: content/drive/MyDrive/Colab Notebooks/medwaste_index/docstore.json (deflated 72%)


In [25]:
from google.colab import files
files.download("/content/medwaste_guardian_rag.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
#pip install pypdf
#pip install -q transformers einops accelerate langchain bitsandbytes
#pip install llama_index
#pip install --upgrade llama-index llama-index-llms-huggingface transformers
#pip install llama-index llama-index-readers-file
#pip install sentence_transformers
#pip install -U llama-index-embeddings-huggingface langchain langchain-community
#pip install fastapi uvicorn


In [1]:
from llama_index.llms.huggingface import HuggingFaceLLM  
llm = HuggingFaceLLM(model_name="meta-llama/Llama-2-7b-chat-hf")  
print("LLaMA-2 model loaded successfully!")


  from .autonotebook import tqdm as notebook_tqdm
Downloading shards:   0%|          | 0/2 [05:38<?, ?it/s]


KeyboardInterrupt: 

In [2]:
response = query_engine.query("What are the biomedical waste disposal rules for human anatomical waste?")
print(response)


NameError: name 'query_engine' is not defined