In [10]:
import google.generativeai as genai
import faiss
import numpy as np
import pickle

In [None]:
GEMINI_API_KEY = 'YOUR_API_KEY'

In [12]:
genai.configure(api_key=GEMINI_API_KEY)
EMBEDDING_MODEL_NAME = "models/text-embedding-004" # Standard Gemini embedding model
FAISS_INDEX_PATH = "school_handbook.faiss"
CHUNKS_DATA_PATH = "school_handbook_chunks.pkl"
GENERATION_MODEL_NAME = "gemini-2.0-flash-exp"  # For generating responses

In [13]:
# --- 1. Load the chunks and the embeddings (FAISS index) ---
print("\n1. Loading FAISS index and chunks data...")
loaded_index = None
loaded_chunks = []
try:
    loaded_index = faiss.read_index(FAISS_INDEX_PATH)
    print(f"FAISS index loaded from {FAISS_INDEX_PATH}. Total vectors: {loaded_index.ntotal}")

    with open(CHUNKS_DATA_PATH, "rb") as f:
        loaded_chunks = pickle.load(f)
    print(f"Chunks data loaded from {CHUNKS_DATA_PATH}. Total chunks: {len(loaded_chunks)}")
except Exception as e:
    print(f"Error loading data: {e}")
    exit()
print("------------------------------------")


1. Loading FAISS index and chunks data...
FAISS index loaded from school_handbook.faiss. Total vectors: 70
Chunks data loaded from school_handbook_chunks.pkl. Total chunks: 70
------------------------------------


In [14]:
# --- 2. Make a retrieval function ---
print("\n2. Defining retrieval function...")
def retrieve_relevant_chunks(user_prompt: str, faiss_index, text_chunks: list, top_k: int = 3) -> list[str]:
    """
    Retrieves the top_k most relevant chunks from the FAISS index based on the user prompt.
    """
    if not faiss_index or not text_chunks:
        print("FAISS index or text_chunks not available for retrieval.")
        return []

    try:
        # Embed the user prompt (query)
        # `task_type="RETRIEVAL_QUERY"` is important for search queries
        query_embedding_response = genai.embed_content(
            model=EMBEDDING_MODEL_NAME,
            content=user_prompt,
            task_type="RETRIEVAL_QUERY"
        )
        query_embedding = query_embedding_response['embedding']
        query_vector_np = np.array([query_embedding]).astype('float32')

        # Search the FAISS index
        distances, indices = faiss_index.search(query_vector_np, top_k)

        relevant_chunks = []
        print(f"\nSearching for: '{user_prompt}'")
        print("Top matches:")
        for i in range(len(indices[0])):
            idx = indices[0][i]
            dist = distances[0][i]
            if idx < len(text_chunks): # Ensure index is within bounds
                relevant_chunks.append(text_chunks[idx])
                print(f"  Match {i+1} (Index: {idx}, Distance: {dist:.4f}):")
                print(f"    \"{text_chunks[idx][:150].replace(r'\n', ' ')}...\"") # Preview
            else:
                print(f"  Warning: FAISS index {idx} out of bounds for loaded chunks ({len(text_chunks)}).")
        return relevant_chunks

    except Exception as e:
        print(f"Error during retrieval: {e}")
        return []


2. Defining retrieval function...


In [16]:
# --- Example Usage of Retrieval Function ---
if loaded_index and loaded_chunks:
    print("\n--- Retrieval Example ---")
    sample_prompt_1 = "What is the school's mission?"
    retrieved_1 = retrieve_relevant_chunks(sample_prompt_1, loaded_index, loaded_chunks, top_k=2)

    sample_prompt_2 = "Tell me about the dress code."
    retrieved_2 = retrieve_relevant_chunks(sample_prompt_2, loaded_index, loaded_chunks, top_k=3)

    sample_prompt_3 = "Emergency procedures for fire"
    retrieved_3 = retrieve_relevant_chunks(sample_prompt_3, loaded_index, loaded_chunks, top_k=1)
else:
    print("Skipping retrieval example as data was not loaded properly.")

print("\n--- Script Finished ---")


--- Retrieval Example ---

Searching for: 'What is the school's mission?'
Top matches:
  Match 1 (Index: 5, Distance: 0.6486):
    "**1.2. Our Mission**

The mission of Pathways Academy is to provide a safe, supportive, and stimulating learning environment where all students can ac..."
  Match 2 (Index: 6, Distance: 0.7698):
    "**1.3. Our Vision**

Pathways Academy aspires to be a leading educational institution recognized for its innovative teaching practices, commitment to ..."

Searching for: 'Tell me about the dress code.'
Top matches:
  Match 1 (Index: 29, Distance: 0.6979):
    "**4.4. Dress Code**
The purpose of the Pathways Academy dress code is to promote a positive, safe, and respectful learning environment, minimize distr..."
  Match 2 (Index: 25, Distance: 0.9523):
    "**4. CODE OF CONDUCT**

The Pathways Academy Code of Conduct is established to create and maintain a safe, orderly, respectful, and positive learning ..."
  Match 3 (Index: 28, Distance: 0.9639):
    "**4

In [17]:

def generate_response(user_question: str, relevant_chunks: list):
    """Generate a response using Gemini with the retrieved context"""
    if not relevant_chunks:
        return "I couldn't find relevant information in the school handbook to answer your question. Please try rephrasing your question or contact the school administration directly."
    
    # Prepare context from retrieved chunks
    context = "\n\n".join([f"Source {i+1}:\n{chunk}" for i, chunk in enumerate(relevant_chunks)])
    
    # Create prompt for Gemini
    prompt = f"""You are a helpful assistant for Pathways Academy. Use the following information from the school handbook to answer the student's question accurately and helpfully.

CONTEXT FROM SCHOOL HANDBOOK:
{context}

STUDENT QUESTION: {user_question}

INSTRUCTIONS:
- Answer based primarily on the provided context from the school handbook
- Be clear, helpful, and student-friendly
- If the context doesn't fully answer the question, mention what information is available and suggest contacting school administration for additional details
- Use a warm, supportive tone appropriate for students and parents
- Structure your response clearly with headings or bullet points when appropriate

ANSWER:"""

    try:
        model = genai.GenerativeModel(GENERATION_MODEL_NAME)
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        print(f"Error generating response: {e}")
        return "I apologize, but I'm having trouble generating a response right now. Please try again later or contact the school directly."
    

generated_response = generate_response(sample_prompt_1,retrieved_1)

In [21]:
print(generated_response)

Hey there! I can definitely help you with that!

Based on the Pathways Academy handbook, here's the school's mission:

**Pathways Academy's Mission**

The mission of Pathways Academy is to provide a safe, supportive, and stimulating learning environment where all students can:

*   Achieve academic excellence
*   Develop critical thinking skills
*   Become responsible, compassionate, and contributing members of a diverse global society

Hope this helps you understand what Pathways Academy is all about! If you have any other questions, feel free to ask! You can also contact the school administration for more details.

