In [6]:
%mkdir -p pdfs 
!gdown "https://drive.google.com/uc?id=11VmupXJDVQv3k4Pc5McIaaayilLHqBsF" -O pdfs/1.pdf
!gdown "https://drive.google.com/uc?id=11Y1Gxi-EN6OcbOSMxb_vJwxV7neIQPss" -O pdfs/2.pdf
!gdown "https://drive.google.com/uc?id=11aKfjXguCoeprGzPrZUHNgTxzUq4u6V-" -O pdfs/3.pdf

Downloading...
From: https://drive.google.com/uc?id=11VmupXJDVQv3k4Pc5McIaaayilLHqBsF
To: /content/pdfs/1.pdf
100% 1.90M/1.90M [00:00<00:00, 14.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=11Y1Gxi-EN6OcbOSMxb_vJwxV7neIQPss
To: /content/pdfs/2.pdf
100% 1.93M/1.93M [00:00<00:00, 15.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=11aKfjXguCoeprGzPrZUHNgTxzUq4u6V-
To: /content/pdfs/3.pdf
100% 401k/401k [00:00<00:00, 4.97MB/s]


In [7]:
!pip install langchain langchain-community langchain-huggingface langchain-text-splitters
!pip install sentence-transformers faiss-cpu
!pip install pypdf python-docx

Collecting langchain-huggingface
  Downloading langchain_huggingface-1.1.0-py3-none-any.whl.metadata (2.8 kB)
Downloading langchain_huggingface-1.1.0-py3-none-any.whl (29 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-1.1.0
Collecting pypdf
  Downloading pypdf-6.4.0-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-6.4.0-py3-none-any.whl (329 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.5/329.5 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-6.4.0


In [25]:
import re
from typing import List
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import FAISS

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

1. Document Loading

In [26]:
# ---------------------------
# Document Loading
# ---------------------------
def load_document(file_path: str) -> str:
    """Load document based on file extension using LangChain loaders"""
    try:
        if file_path.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif file_path.endswith(".docx"):
            loader = Docx2txtLoader(file_path)
        elif file_path.endswith(".txt"):
            loader = TextLoader(file_path, encoding="utf-8")
        else:
            raise ValueError(f"Unsupported file type: {file_path}")
        
        documents = loader.load()
        text = "\n".join([doc.page_content for doc in documents])
        return re.sub(r'\s+', ' ', text).strip()
    except Exception as e:
        print(f"Error loading document: {e}")
        raise

# Test the function
# Load PDF document
print("\n=== Loading Document ===")
file_path = "pdfs/2.pdf"
print("\n=== Loading Document ===")
text = load_document(file_path)
print(f"Loaded text (first 300 chars): {text[:300]}...")


=== Loading Document ===

=== Loading Document ===
Loaded text (first 300 chars): NEW SOFTWARE TECHNOLOGIES IN SOFTWARE ENGINEERING Instructor's Name: Khoan Nguyen Contents Course Introduction and Overview .......................................................................................... 4 Course Objectives ....................................................................


2. Text Chunking

In [27]:
# ---------------------------
# Text Chunking
# ---------------------------
def create_chunks(text: str, chunk_size: int = 500, chunk_overlap: int = 50) -> List[str]:
    """Split text into chunks using LangChain's RecursiveCharacterTextSplitter"""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", ". ", " ", ""]
    )
    chunks = text_splitter.split_text(text)
    return chunks

# Test the function
# Create text chunks
print("\n=== Creating Text Chunks ===")
chunk_size = 500
chunk_overlap = 50
chunks = create_chunks(text, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
print(f"Created {len(chunks)} chunks")
print(f"Sample chunk: {chunks[0][:200]}...")


=== Creating Text Chunks ===
Created 294 chunks
Sample chunk: NEW SOFTWARE TECHNOLOGIES IN SOFTWARE ENGINEERING Instructor's Name: Khoan Nguyen Contents Course Introduction and Overview ...............................................................................


3. Vector Store

In [30]:
class VectorStoreService:
    """Handle embeddings and vector storage using LangChain + FAISS"""
    
    def __init__(self, model_name: str = "Qwen/Qwen3-Embedding-0.6B"):
        print(f"Loading embedding model: {model_name}")
        self.embeddings = HuggingFaceEmbeddings(
            model_name=model_name,
            model_kwargs={'device': 'cuda'},
            encode_kwargs={'normalize_embeddings': True}
        )
        self.vector_store = None
        print("Embedding model loaded successfully")
    
    def create_vector_store(self, chunks: List[str]):
        """Create FAISS vector store from text chunks"""
        print(f"Creating vector store from {len(chunks)} chunks...")
        self.vector_store = FAISS.from_texts(
            texts=chunks,
            embedding=self.embeddings
        )
        print("Vector store created successfully")
    
    def save_vector_store(self, path: str = "faiss_index"):
        """Save vector store to disk"""
        if self.vector_store:
            self.vector_store.save_local(path)
            print(f"Vector store saved to {path}")
    
    def load_vector_store(self, path: str = "faiss_index"):
        """Load vector store from disk"""
        self.vector_store = FAISS.load_local(
            path, 
            self.embeddings,
            allow_dangerous_deserialization=True
        )
        print(f"Vector store loaded from {path}")
    
    def get_retriever(self, k: int = 5):
        """Get retriever for similarity search"""
        if not self.vector_store:
            raise ValueError("Vector store not initialized")
        return self.vector_store.as_retriever(search_kwargs={"k": k})
    
    def similarity_search(self, query: str, k: int = 5):
        """Direct similarity search returning documents"""
        if not self.vector_store:
            raise ValueError("Vector store not initialized")
        return self.vector_store.similarity_search(query, k=k)

# Test the VectorStoreService
print("\n=== Creating Vector Store ===")
vector_service = VectorStoreService()
vector_service.create_vector_store(chunks)


=== Creating Vector Store ===
Loading embedding model: Qwen/Qwen3-Embedding-0.6B
Embedding model loaded successfully
Creating vector store from 294 chunks...
Vector store created successfully


4. RAG Service

In [31]:
class RAGService:
    """RAG service using LangChain chains"""
    
    def __init__(self, vector_store_service: VectorStoreService, model_name: str = "Qwen/Qwen3-0.6B"):
        self.vector_store_service = vector_store_service
        self.llm = None
        
        try:
            print(f"Loading LLM: {model_name}")
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype="auto",
                device_map="auto"
            )
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            
            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=512,
                temperature=0.7,
                top_p=0.95,
                repetition_penalty=1.15
            )
            
            self.llm = HuggingFacePipeline(pipeline=pipe)
            print("LLM loaded successfully")
        except Exception as e:
            print(f"Warning: Failed to load LLM ({e}). Using mock responses.")
    
    def answer_query(self, query: str, k: int = 5):
        """Answer a query using RAG - simplified without chains"""
        # Use direct similarity search instead of retriever
        docs = self.vector_store_service.similarity_search(query, k=k)
        
        # Combine context from retrieved documents
        context = "\n\n".join([doc.page_content for doc in docs])
        
        if self.llm:
            try:
                # Create prompt
                prompt = f"""Use the following context to answer the question. If you don't know the answer, say so.

Context: {context}

Question: {query}

Answer:"""
                
                # Generate answer
                answer = self.llm.invoke(prompt)
                
                return {
                    "answer": answer,
                    "source_documents": docs,
                    "context": context
                }
            except Exception as e:
                print(f"Error during query: {e}")
                return self._mock_answer(query, k, docs, context)
        else:
            return self._mock_answer(query, k, docs, context)
    
    def _mock_answer(self, query: str, k: int, docs=None, context=None):
        """Mock answer when LLM is not available"""
        if docs is None or context is None:
            docs = self.vector_store_service.similarity_search(query, k=k)
            context = "\n\n".join([doc.page_content for doc in docs])
        
        return {
            "answer": f"[Mock Mode] Based on the context, here are the relevant sections for your query: '{query}'",
            "source_documents": docs,
            "context": context
        }

# Test the RAGService
print("\n=== Initializing RAG Service ===")
rag_service = RAGService(vector_service)


=== Initializing RAG Service ===
Loading LLM: Qwen/Qwen3-0.6B


config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

Device set to use cuda:0


LLM loaded successfully


5. Query

In [33]:
# Test queries  
top_k = 5

print("\n=== Testing Queries ===")
test_queries = [
    "What are new technologies mentioned in the course?",
    "What databases are mentioned?",
    "What is the main objective of the course?"
]

# Execute test queries
for query in test_queries:
    result = rag_service.answer_query(query, k=top_k)
    print(f"Answer: {result['answer']}")
    print(f"\nRelevant sources ({len(result['source_documents'])} chunks):")
    for i, doc in enumerate(result['source_documents'][:2]):  
        print(f"  [{i+1}] {doc.page_content[:150]}...")



=== Testing Queries ===
Answer: Use the following context to answer the question. If you don't know the answer, say so.

Context: . 75 Course Introduction and Overview Welcome and Introduction Imagine stepping into a world where software drives everything around you—your phone, your car, even your home appliances. In this course, we'll journey through the technologies that power these innovations. But first, let's get to know each other and set the stage for what lies ahead. Course Objectives Think about the last time you used a cutting-edge app or saw AI in action

. These experiences are shaped by the very technologies we will explore in this course. By the end of this semester, you’ll not only understand how these technologies work but also how to harness them in your own projects. 1. Introduce students to emerging technologies in software engineering with a focus on web, mobile, and AI-driven development. 2. Develop an understanding of modern tools and practices that are reshaping

In [34]:
result

{'answer': 'Use the following context to answer the question. If you don\'t know the answer, say so.\n\nContext: . 75 Course Introduction and Overview Welcome and Introduction Imagine stepping into a world where software drives everything around you—your phone, your car, even your home appliances. In this course, we\'ll journey through the technologies that power these innovations. But first, let\'s get to know each other and set the stage for what lies ahead. Course Objectives Think about the last time you used a cutting-edge app or saw AI in action\n\n. o Development: Teams work on their projects, applying the concepts and tools learned throughout the course. o Presentations: Teams present their projects, demonstrating functionality, discussing challenges faced, and explaining the technologies used. • Course Review and Q&A: o Recap of Key Concepts: Review of major topics covered in the course. o Open Discussion: Q&A session, discussion on the future of software engineering, and feedb