<a href="https://colab.research.google.com/github/gvgabison/Sample-LLM-/blob/main/sampleRAGLLMv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install required libraries
!pip install sentence-transformers faiss-cpu transformers PyMuPDF

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting PyMuPDF
  Downloading PyMuPDF-1.24.14-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyMuPDF-1.24.14-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (19.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.8/19.8 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDF, faiss-cpu
Successfully installed PyMuPDF-1.24.14 faiss-cpu-1.9.0.post1


In [4]:

# Import libraries
from google.colab import files
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import faiss
import torch
import numpy as np

# Upload PDF files to Google Colab
uploaded = files.upload()

# Function to extract text from a PDF file using PyMuPDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        text += page.get_text()
    return text

# Extract text from all uploaded PDF files
pdf_texts = {}
for pdf_file in uploaded.keys():
    if pdf_file.endswith(".pdf"):
        pdf_texts[pdf_file] = extract_text_from_pdf(pdf_file)

# Combine the extracted text into a single string
full_text = " ".join(pdf_texts.values())

# Split the text into chunks for document retrieval
documents = [full_text[i:i+500] for i in range(0, len(full_text), 500)]  # 500-character chunks
print(f"Number of documents loaded: {len(documents)}")

# Generate embeddings and build FAISS index
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
document_embeddings = embedding_model.encode(documents)
dimension = document_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(document_embeddings))

# Load the pre-trained LLM
model_name = "facebook/opt-350m"  # Use a smaller model suitable for CPU
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cpu")  # Force CPU usage

#this one is for with GPU - paid version, fyi no more free tier for chatgpt as of Nov 1 2024
# Load LLaMA or OpenLLM
#model_name = "meta-llama/Llama-2-7b-hf"  # Example model (you may need GPU for larger models)
#tokenizer = AutoTokenizer.from_pretrained(model_name)
#model = AutoModelForCausalLM.from_pretrained(model_name, device_map="gpu")

# Function to retrieve documents
def retrieve_documents(query, top_k=2):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    results = [documents[idx] for idx in indices[0]]
    return results

# Function to generate a response using RAG
def generate_rag_answer(query):
    # Retrieve relevant documents
    retrieved_docs = retrieve_documents(query)

    # Combine retrieved documents into context
    context = " ".join(retrieved_docs)

    # Construct the input prompt
    input_prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"

    # Tokenize and generate response
    inputs = tokenizer(input_prompt, return_tensors="pt").to("cpu")  # Force CPU usage
    outputs = model.generate(
        inputs['input_ids'],
        max_new_tokens=150,  # Control the output length
        num_return_sequences=1
    )
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Interactive Loop for Asking Questions
while True:
    user_query = input("Ask a question about the PDF (type 'exit' to quit): ")
    if user_query.lower() == "exit":
        print("Exiting. Goodbye!")
        break
    answer = generate_rag_answer(user_query)
    print(f"Answer: {answer}\n")


Saving HR_MANUAL__24.09.2017.pdf to HR_MANUAL__24.09.2017.pdf
Saving HR-Manual.pdf to HR-Manual.pdf
Number of documents loaded: 481


tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Ask a question about the PDF (type 'exit' to quit): attendance
Answer: Context: nt from work.  At the same time, it is important for employees to understand that in order for Tri-
County Community Action Program, Inc. to operate efficiently, regular attendance of all employees is 
imperative.  Employees need to be present and accountable during their scheduled work time and there 
are attendance expectations.   
 
PROCEDURE 
 
1. Authorized Absence - the employee notifies immediate supervisor or designee in advance and 
obtains approval to be away from or late to work.  Thi om customers and/or coworkers 
r. awards or citations for excellent performance 
s. records of attendance or completion of training programs 
t. Discipline  
u. Grievances  
 
21 
 
v. Performance goals  
w. Scheduled training and completed training records  
x. documents relating to the worker's departure from the company (such as reasons why the 
worker left or was fired, unemployment documents, insurance continua