# CTSE Lecture Notes Chatbot using Gemini API
# SE4010 - Current Trends in Software Engineering Assignment

## 1. Install Required Dependencies

# Run this cell to install all necessary packages - remove the comments
# !pip install langchain langchain_community python-pptx sentence-transformers faiss-cpu
# !pip install google-generativeai langchain-google-genai

## 2. Import Libraries and Setup

In [4]:
import os
import glob
from pptx import Presentation
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai

## 3. Configure Gemini API

In [5]:
# Set your Gemini API key (you'll need to get this from Google AI Studio)
GOOGLE_API_KEY = "AIzaSyAKvcPDDIQUJW6qLZDoGWU4PJgJX76eOOM"  # Replace with your actual API key
genai.configure(api_key=GOOGLE_API_KEY)
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

## 4./5.Function to Extract Text from PowerPoint Files & Load PowerPoint Files from a Directory

In [6]:
## 4. Function to Extract Text from PowerPoint Files

def extract_text_from_pptx(pptx_path):
    """Extract text from a PowerPoint file."""
    prs = Presentation(pptx_path)
    text_content = []
    
    # Extract slide number for context
    slide_number = 1
    
    for slide in prs.slides:
        slide_text = f"Slide {slide_number}: "
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                slide_text += shape.text + " "
        text_content.append(slide_text.strip())
        slide_number += 1
    
    return "\n\n".join(text_content)

## 5. Load PowerPoint Files from a Directory

def load_pptx_files(directory_path):
    """Load all PowerPoint files from a directory and extract their text."""
    pptx_files = glob.glob(os.path.join(directory_path, "*.pptx"))
    all_text = []
    
    for file_path in pptx_files:
        file_name = os.path.basename(file_path)
        print(f"Processing: {file_name}")
        text = extract_text_from_pptx(file_path)
        # Add file source for better context
        text = f"Source: {file_name}\n\n{text}"
        all_text.append(text)
    
    return all_text

# Set the path to your folder containing PPTX lecture notes
pptx_directory = "./lectures"  # Change this to your actual directory path

# Uncomment the following line when ready to process your files
all_lecture_notes = load_pptx_files(pptx_directory)
print(f"Loaded {len(all_lecture_notes)} PowerPoint files")

Processing: AWS User Groups Colombo - Introduction to AWS Cloud Platform.pptx
Processing: CAP Theorem.pptx
Processing: Cloud Computing 101.pptx
Processing: Cloud Design Patterns - 1.pptx
Processing: Cloud Design Patterns - 2.pptx
Processing: Containers 101.pptx
Processing: Intro to DevOps and Beyond.pptx
Processing: Introduction to Microservices.pptx
Processing: Key Essentials for Building Application in Cloud.pptx
Processing: Lecture 2 - Part 1.pptx
Processing: Lecture 2 - Part 2.pptx
Processing: Microservice Design Patterns.pptx
Loaded 12 PowerPoint files


## 6. Process and Split the Text

In [7]:
def process_text(texts):
    """Split the text into smaller chunks for better retrieval."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,  # Adjust based on your content
        chunk_overlap=200,
        length_function=len,
    )
    
    all_splits = []
    for text in texts:
        splits = text_splitter.split_text(text)
        all_splits.extend(splits)
    
    return all_splits

# Uncomment when ready to process your files
text_chunks = process_text(all_lecture_notes)
print(f"Created {len(text_chunks)} text chunks")

Created 113 text chunks


## 7. Create Vector Store for Similarity Search

In [10]:
def create_vector_store(text_chunks):
    """Create a vector store using FAISS for efficient similarity search."""
    # Using a free, locally-running embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2",  # A lightweight model that works well for semantic search
        model_kwargs={'device': 'cpu'}
    )
    
    vector_store = FAISS.from_texts(text_chunks, embeddings)
    return vector_store

# Uncomment when ready to create your vector store
vector_store = create_vector_store(text_chunks)
print("Vector store created successfully")

Vector store created successfully


## 8. Set Up Gemini LLM

In [14]:
def setup_gemini_llm():
    """Set up the Gemini Pro LLM via API."""
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        temperature=0.7,
        top_p=0.85,
        max_output_tokens=1024,
        convert_system_message_to_human=True
    )
    
    return llm

# Setup memory to maintain conversation history
memory = ConversationBufferMemory(
    memory_key="chat_history",
    output_key="answer",  # Add this line to specify which output to store
    return_messages=True
)

## 9. Create the Conversational Retrieval Chain

In [15]:
def create_chatbot(vector_store, llm):
    """Create the conversational retrieval chain."""
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
        memory=memory,
        return_source_documents=True  # To show where the information comes from
    )
    
    return qa_chain

# Uncomment when ready to set up your chatbot
llm = setup_gemini_llm()
qa_chain = create_chatbot(vector_store, llm)
print("Chatbot is ready!")

Chatbot is ready!


## 10. Run the Chatbot

In [16]:
def ask_question(qa_chain, question):
    """Ask a question to the chatbot."""
    try:
        result = qa_chain({"question": question})
        
        print("Question:", question)
        print("\nAnswer:", result["answer"])
        print("\nSources:")
        for doc in result["source_documents"]:
            print("-", doc.metadata.get("source", "Unknown source"))
        
        return result
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None

# Example usage:
ask_question(qa_chain, "What are the main topics covered in CTSE?")



Question: What are the main topics covered in CTSE?

Answer: I don't see an answer to the question "What are the main topics covered in CTSE?" in the given context.

Sources:
- Unknown source
- Unknown source
- Unknown source


{'question': 'What are the main topics covered in CTSE?',
 'chat_history': [HumanMessage(content='What are the main topics covered in CTSE?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='I don\'t see an answer to the question "What are the main topics covered in CTSE?" in the given context.', additional_kwargs={}, response_metadata={})],
 'answer': 'I don\'t see an answer to the question "What are the main topics covered in CTSE?" in the given context.',
 'source_documents': [Document(id='7948a28e-b058-4319-8b6c-ddff45a9853d', metadata={}, page_content='Slide 21: References  https://www.cloudflare.com/learning/cloud/what-is-the-cloud/ \nhttps://www.redhat.com/en/topics/cloud-computing/what-is-iaas \nhttps://www.redhat.com/en/topics/cloud-computing/what-is-paas \nhttps://www.redhat.com/en/topics/cloud-computing/what-is-saas\nCloud Computing: Concepts, Technology & Architecture, Thomas Erl, et al., Prentice‐Hall, 2013,\nThe Datacenter as a Computer – Designing Wareho

## 11. Interactive Chat Interface


In [None]:
def interactive_chat():
    """Run an interactive chat session."""
    print("Welcome to the CTSE Lecture Notes Chatbot!")
    print("Type 'exit' to end the conversation.")
    
    while True:
        user_input = input("\nAsk a question: ")
        if user_input.lower() == 'exit':
            break
        
        try:
            ask_question(qa_chain, user_input)
        except Exception as e:
            print(f"Error: {e}")
            print("Please try another question.")

# Start the interactive chat when ready
interactive_chat()


Welcome to the CTSE Lecture Notes Chatbot!
Type 'exit' to end the conversation.
