In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from PyPDF2 import PdfReader

# Load the PDF
pdf_reader = PdfReader("example.pdf")
text = ""
for page in pdf_reader.pages:
    text += page.extract_text()

# Split text into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # Adjust size as needed
    chunk_overlap=50  # Overlap to maintain context
)
chunks = text_splitter.split_text(text)


In [10]:
from sentence_transformers import SentenceTransformer

# Load a free embeddings model
model = SentenceTransformer('all-MiniLM-L6-v2')  # Free model

# Generate embeddings for the chunks
embeddings = model.encode(chunks)


In [11]:
import faiss
import numpy as np

# Create a FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)

# Add embeddings to the index
index.add(np.array(embeddings))

# Save FAISS index locally (optional)
faiss.write_index(index, "faiss_index")


In [27]:
# User query
query = input("Enter your query")
query_embedding = model.encode([query])

# Search FAISS for the top matches
k = 5  # Number of chunks to retrieve
distances, indices = index.search(np.array(query_embedding), k)

# Retrieve the top chunks
retrieved_chunks = [chunks[i] for i in indices[0]]


In [28]:
from transformers import pipeline

# Load a free language model
generator = pipeline('text-generation', model='bigscience/bloom-560m', device='cpu')

# Combine retrieved chunks into context
context = " ".join(retrieved_chunks)
response = generator(
    f"Context: {context}\n\nQuestion: {query}\nAnswer:",
    max_new_tokens=50,  # Number of tokens to generate
    truncation=True     # Truncate the input if it exceeds the max length
)
print(response[0]['generated_text'])


Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Context: 4.6.8.3 Stopping distances and stopping times for STOP 1, axis 2
Fig. 4-52: Stopping distances for STOP 1, axis 2
68 / 109 Issued: 22.07.2013 Version: Spez KR 360-3 KR 500-3 V5 en (PDF)KR 360-3; KR 500-3
Fig. 4-53: Stopping times for STOP 1, axis 2
69 / 109 Issued: 22.07.2013 Version: Spez KR 360-3 KR 500-3 V5 en (PDF)4 Technical data
4.6.8.4 Stopping distances and stopping times for STOP 1, axis 3
Fig. 4-54: Stopping distances for STOP 1, axis 3
Fig. 4-55: Stopping times for STOP 1, axis 3 4.6.6.2 Stopping distances and stopping times for STOP 1, axis 1
Fig. 4-38: Stopping distances for STOP 1, axis 1
56 / 109 Issued: 22.07.2013 Version: Spez KR 360-3 KR 500-3 V5 en (PDF)KR 360-3; KR 500-3
Fig. 4-39: Stopping times for STOP 1, axis 1
57 / 109 Issued: 22.07.2013 Version: Spez KR 360-3 KR 500-3 V5 en (PDF)4 Technical data
4.6.6.3 Stopping distances and stopping times for STOP 1, axis 2
Fig. 4-40: Stopping distances for STOP 1, axis 2 Fig. 4-35: Stopping times for STOP 1, axis 2

In [23]:
# Combine retrieved chunks into context
context = " ".join(retrieved_chunks)
response = generator(
    f"Context: {context}\n\nQuestion: {query}\nAnswer:",
    max_new_tokens=50,  # Number of tokens to generate
    truncation=True     # Truncate the input if it exceeds the max length
)
print(response[0]['generated_text'])

Context: 4.6.8.3 Stopping distances and stopping times for STOP 1, axis 2
Fig. 4-52: Stopping distances for STOP 1, axis 2
68 / 109 Issued: 22.07.2013 Version: Spez KR 360-3 KR 500-3 V5 en (PDF)KR 360-3; KR 500-3
Fig. 4-53: Stopping times for STOP 1, axis 2
69 / 109 Issued: 22.07.2013 Version: Spez KR 360-3 KR 500-3 V5 en (PDF)4 Technical data
4.6.8.4 Stopping distances and stopping times for STOP 1, axis 3
Fig. 4-54: Stopping distances for STOP 1, axis 3
Fig. 4-55: Stopping times for STOP 1, axis 3 4.6.8.1 Stopping distances and stopping times for STOP 0, axis 1 to axis 3 .............. 64
4.6.8.2 Stopping distances and stopping times for STOP 1, axis 1 ............................ 654.6.8.3 Stopping distances and stopping times for STOP 1, axis 2 ............................ 67
4.6.8.4 Stopping distances and stopping times for STOP 1, axis 3 ............................ 69 4.6.6.2 Stopping distances and stopping times for STOP 1, axis 1
Fig. 4-38: Stopping distances for STOP 1, axis 