In [1]:
# Import Libraries
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

# Import Other Libraries
from dotenv import load_dotenv
import os

In [2]:
# Load Environment Variables
load_dotenv()

# Set Environment Variables
HUGGING_FACE_API = os.getenv("HUGGINGFACE_API_KEY")

In [3]:
# Model URLs
meta_llama = "meta-llama/Llama-3.2-1B"
SentenceTransformer = "sentence-transformers/all-mpnet-base-v2"

In [4]:
# Initialize HuggingFace LLM
llm = HuggingFaceEndpoint(
    repo_id=meta_llama,  # Llama model
    huggingfacehub_api_token=HUGGING_FACE_API,  # Pass the API key here
    temperature=0.5,
    max_new_tokens=512,
)

In [5]:
"""from langchain_huggingface import HuggingFaceEndpoint

# Initialize HuggingFace LLM
llm = HuggingFaceEndpoint(
    repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",  # Public model
    huggingfacehub_api_token=HUGGING_FACE_API,  # Pass the API key here
)

# Test the LLM
print(llm("What is the capital of France?"))"""

'from langchain_huggingface import HuggingFaceEndpoint\n\n# Initialize HuggingFace LLM\nllm = HuggingFaceEndpoint(\n    repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",  # Public model\n    huggingfacehub_api_token=HUGGING_FACE_API,  # Pass the API key here\n)\n\n# Test the LLM\nprint(llm("What is the capital of France?"))'

In [6]:
from huggingface_hub import HfApi

api = HfApi(token=HUGGING_FACE_API)
try:
    user_info = api.whoami()
    print("API Key is valid. User:", user_info["name"])
except Exception as e:
    print("Invalid API Key:", e)

API Key is valid. User: iammudaser


In [7]:
# Initilize HuggingFace Embeddings
embeddings = HuggingFaceEmbeddings(
    model_name=SentenceTransformer
)

In [8]:
# Initilize Output Parser
output_parser = StrOutputParser()

In [9]:
# Print PDF file path
pdf_file_path = os.path.join(os.path.dirname(os.getcwd()), "data/codeprolk.pdf")
print("PDF File Path:", pdf_file_path)

PDF File Path: c:\Users\RameezRassdeen\Documents\langchain-chat-bot\chatbot\data/codeprolk.pdf


In [10]:
pdf_file_path

'c:\\Users\\RameezRassdeen\\Documents\\langchain-chat-bot\\chatbot\\data/codeprolk.pdf'

In [11]:
# Load PDF File
loader = PyPDFLoader("C:\\Users\\RameezRassdeen\\Documents\\langchain-chat-bot\\chatbot\\data\\codeprolk.pdf")

# Load documents from the PDF
documents = loader.load()

In [12]:
len(documents)

4

In [13]:
# Initialize Text Splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=50,
    length_function=len
)

# Split documents into smaller chunks
texts = text_splitter.split_documents(documents)

# Print the number of chunks created
print(f"Number of chunks created: {len(texts)}")

Number of chunks created: 20


In [14]:
# Create a FAISS vector store from the texts
vectorstore = FAISS.from_documents(documents=texts, embedding=embeddings)

In [15]:
# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

In [16]:
# Define a prompt template for the LLM
prompt_template ="""
    You are a helpful assistant. Answer the question based on the provided context.

    Question: {question}

    Context: {context}

    Answer: 
    """

# Create the prompt from the template
prompt = ChatPromptTemplate.from_template(template=prompt_template)

In [17]:
# Define a function to extract the question and pass it to the retriever
def retrieve_context(input_dict):
    question = input_dict["question"]
    context = retriever.invoke(question)
    print(f"Retrieved context: {context}")  # Check the context being passed
    return context

# Update the chain
chain = (
    {"context": RunnableLambda(retrieve_context), "question": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

In [18]:
# Invoke RAG Chain with a sample question
response = chain.invoke({"question": "who is codeprolk?"})

# Print the response
print("Response:", response)

Retrieved context: [Document(id='263a197e-4cf4-4a1b-87d5-812442a78afb', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2024-06-18T10:00:42+05:30', 'author': 'Dinesh Piyasamara', 'moddate': '2024-06-18T10:00:42+05:30', 'source': 'C:\\Users\\RameezRassdeen\\Documents\\langchain-chat-bot\\chatbot\\data\\codeprolk.pdf', 'total_pages': 4, 'page': 3, 'page_label': '4'}, page_content='Partnerships and Collaborations \nCodePRO LK is exploring partnerships with educational institutions, tech companies, and \nindustry experts to enrich its content and provide learners with access to a broader range of \nresources and opportunities. These collaborations aim to bridge the gap between education and \nindustry, ensuring that learners are well-prepared for real-world challenges.'), Document(id='1fdd39a1-3f55-4ad3-8962-cf71134ef370', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Mi



Response: 
