In [1]:
import os
from dotenv import load_dotenv
from groq import Groq


# Load environment variables from .env file
load_dotenv()

def process_with_groq_clip(state):
    # Get API key from environment
    api_key = os.getenv("GROQ_API_KEY")
    
    # Debug: Check if API key is loaded
    if not api_key:
        print("❌ GROQ_API_KEY not found in environment variables")
        return {**state, "llm_response": "API key not configured"}
    
    print(f"✅ API key loaded: {api_key[:6]}...")

In [6]:
from langgraph.graph import StateGraph, START, END
from typing import TypedDict, Dict, Any, List
import os
import markdown
import re
from PIL import Image
import torch
import clip
import numpy as np
from groq import Groq
from pathlib import Path

# Set your Groq API key here or use environment variable
# # GROQ_API_KEY = "gsk_your_actual_groq_api_key_here"  # Replace with your key
# os.environ["GROQ_API_KEY"] = GROQ_API_KEY

class QuestionState(TypedDict):
    folder_path: str
    documents: List[Dict[str, Any]]
    image_paths: List[str]
    image_embeddings: List[List[float]]
    text_embeddings: List[List[float]]
    question: str
    user_answer: str
    feedback: str

def extract_image_paths(md_text: str, folder_path: str) -> List[str]:
    img_pattern = r'!\[.*?\]\((.*?)\)'
    relative_paths = re.findall(img_pattern, md_text)
    absolute_paths = []
    
    for img_path in relative_paths:
        if not os.path.isabs(img_path):
            abs_path = os.path.join(folder_path, img_path)
            if os.path.exists(abs_path):
                absolute_paths.append(abs_path)
        else:
            if os.path.exists(img_path):
                absolute_paths.append(img_path)
    
    return absolute_paths

def get_clip_embeddings(image_paths: List[str]) -> List[List[float]]:
    if not image_paths:
        return []
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load("ViT-B/32", device=device)
    model.eval()
    embeddings = []
    
    print(f"🎨 Processing {len(image_paths)} images with CLIP...")
    
    for img_path in image_paths:
        try:
            image = preprocess(Image.open(img_path)).unsqueeze(0).to(device)
            with torch.no_grad():
                embedding = model.encode_image(image)
                embedding = embedding.cpu().numpy().flatten().tolist()
            embeddings.append(embedding)
            print(f"✅ {os.path.basename(img_path)}")
        except Exception as e:
            print(f"❌ Error with {img_path}: {str(e)}")
            embeddings.append([0.0] * 512)
    
    return embeddings

def get_text_clip_embeddings(texts: List[str]) -> List[List[float]]:
    if not texts:
        return []
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, _ = clip.load("ViT-B/32", device=device)
    model.eval()
    embeddings = []
    
    print(f"📝 Processing {len(texts)} texts with CLIP...")
    
    for text in texts:
        try:
            text_tokens = clip.tokenize([text[:77]]).to(device)
            with torch.no_grad():
                embedding = model.encode_text(text_tokens)
                embedding = embedding.cpu().numpy().flatten().tolist()
            embeddings.append(embedding)
        except Exception as e:
            print(f"❌ Error processing text: {str(e)}")
            embeddings.append([0.0] * 512)
    
    return embeddings

# Node 1: Load documents and images
def load_content(state: QuestionState) -> QuestionState:
    folder_path = state["folder_path"]
    
    if not os.path.isdir(folder_path):
        raise ValueError(f"Invalid folder path: {folder_path}")
    
    documents = []
    all_image_paths = []
    
    print(f"📁 Loading content from: {folder_path}")
    
    # Load markdown files
    for file_path in Path(folder_path).glob("*.md"):
        with open(file_path, 'r', encoding='utf-8') as f:
            md_content = f.read()
        
        image_paths = extract_image_paths(md_content, folder_path)
        all_image_paths.extend(image_paths)
        
        documents.append({
            "filename": file_path.name,
            "markdown": md_content,
            "word_count": len(md_content.split())
        })
    
    # Find additional images
    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp'}
    for file_path in Path(folder_path).rglob("*"):
        if file_path.suffix.lower() in image_extensions:
            abs_path = str(file_path.absolute())
            if abs_path not in all_image_paths:
                all_image_paths.append(abs_path)
    
    print(f"✅ Found {len(documents)} documents and {len(all_image_paths)} images")
    
    return {
        **state,
        "documents": documents,
        "image_paths": all_image_paths
    }

# Node 2: Generate CLIP embeddings
def generate_embeddings(state: QuestionState) -> QuestionState:
    documents = state["documents"]
    image_paths = state["image_paths"]
    
    image_embeddings = get_clip_embeddings(image_paths)
    texts = [doc["markdown"] for doc in documents]
    text_embeddings = get_text_clip_embeddings(texts)
    
    print(f"🔗 Generated embeddings: {len(image_embeddings)} images, {len(text_embeddings)} texts")
    
    return {
        **state,
        "image_embeddings": image_embeddings,
        "text_embeddings": text_embeddings
    }

# Node 3: Generate question using Groq
def generate_question(state: QuestionState) -> QuestionState:
    documents = state["documents"]
    image_paths = state["image_paths"]
    
    if not documents:
        question = "❌ No documents found. Please add markdown files to the folder."
        return {**state, "question": question}
    
    # Combine content for question generation
    combined_content = "\n\n".join([doc["markdown"] for doc in documents])
    
    # Create prompt for Groq
    prompt = f"""
    Based on the following content, generate ONE thoughtful question that tests understanding of the key concepts. 
    The question should be clear, specific, and require the person to demonstrate comprehension.
    
    Content ({len(documents)} documents, {len(image_paths)} images):
    {combined_content[:2000]}...
    
    Generate only the question, nothing else.
    """
    
    try:
        client = Groq(api_key=os.getenv("GROQ_API_KEY"))
        
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            max_tokens=200,
            temperature=0.7
        )
        
        question = response.choices[0].message.content.strip()
        print(f"\n🤔 QUESTION GENERATED:")
        print(f"─" * 50)
        print(question)
        print(f"─" * 50)
        
    except Exception as e:
        question = f"❌ Error generating question: {str(e)}\n\nFallback question: What are the main topics covered in the documents?"
        print(question)
    
    return {**state, "question": question}

# Node 4: Collect user answer
def collect_answer(state: QuestionState) -> QuestionState:
    question = state["question"]
    
    print(f"\nYour turn! Please answer the question above:")
    user_answer = input("Your answer: ").strip()
    
    if not user_answer:
        user_answer = "No answer provided."
    
    print(f"\n✅ Answer recorded: {len(user_answer)} characters")
    
    return {**state, "user_answer": user_answer}

# Node 5: Provide feedback
def provide_feedback(state: QuestionState) -> QuestionState:
    question = state["question"]
    user_answer = state["user_answer"]
    documents = state["documents"]
    
    # Create context for feedback
    content_summary = "\n".join([doc["markdown"][:300] for doc in documents])
    
    feedback_prompt = f"""
    Question: {question}
    
    User's Answer: {user_answer}
    
    Content Context: {content_summary}
    
    Provide brief, constructive feedback on the user's answer. Be encouraging but honest about accuracy.
    """
    
    try:
        client = Groq(api_key=os.getenv("GROQ_API_KEY"))
        
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": feedback_prompt}],
            model="meta-llama/llama-4-scout-17b-16e-instruct",
            max_tokens=300,
            temperature=0.5
        )
        
        feedback = response.choices[0].message.content.strip()
        
    except Exception as e:
        feedback = f"Thank you for your answer! Due to a technical issue, I can't provide detailed feedback right now, but your response has been recorded."
    
    print(f"\n💭 FEEDBACK:")
    print(f"─" * 50)
    print(feedback)
    print(f"─" * 50)
    
    return {**state, "feedback": feedback}

# Build the workflow
def create_qa_workflow():
    workflow = StateGraph(QuestionState)
    
    workflow.add_node("load_content", load_content)
    workflow.add_node("generate_embeddings", generate_embeddings)
    workflow.add_node("generate_question", generate_question)
    workflow.add_node("collect_answer", collect_answer)
    workflow.add_node("provide_feedback", provide_feedback)
    
    workflow.set_entry_point("load_content")
    workflow.add_edge("load_content", "generate_embeddings")
    workflow.add_edge("generate_embeddings", "generate_question")
    workflow.add_edge("generate_question", "collect_answer")
    workflow.add_edge("collect_answer", "provide_feedback")
    workflow.add_edge("provide_feedback", END)
    
    return workflow.compile()

# Main execution
def run_qa_session():
    print("🎯 Interactive Q&A with CLIP Embeddings")
    print("═" * 40)
    
    # Get folder path
    folder_path = input("Enter folder path with markdown files and images: ").strip()
    if not folder_path:
        folder_path = "."
    
    # Initialize state
    initial_state = QuestionState(
        folder_path=folder_path,
        documents=[],
        image_paths=[],
        image_embeddings=[],
        text_embeddings=[],
        question="",
        user_answer="",
        feedback=""
    )
    
    # Run workflow
    try:
        workflow = create_qa_workflow()
        result = workflow.invoke(initial_state)
        
        print(f"\n🎉 SESSION COMPLETE!")
        print(f"📊 Summary:")
        print(f"  • Documents: {len(result['documents'])}")
        print(f"  • Images: {len(result['image_paths'])}")
        print(f"  • Embeddings generated: ✅")
        print(f"  • Question asked: ✅")
        print(f"  • Answer collected: ✅")
        print(f"  • Feedback provided: ✅")
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        print("Please check your folder path and Groq API key.")

if __name__ == "__main__":
    run_qa_session()


🎯 Interactive Q&A with CLIP Embeddings
════════════════════════════════════════


📁 Loading content from: E:\7. Projects From Sem 3\RAG\data\rag_1_Intro_20250906_032403\images
✅ Found 0 documents and 2 images
🎨 Processing 2 images with CLIP...
✅ Pasted image 20250714150825.png
✅ Pasted image 20250714163303.png
🔗 Generated embeddings: 2 images, 0 texts

Your turn! Please answer the question above:

✅ Answer recorded: 19 characters

💭 FEEDBACK:
──────────────────────────────────────────────────
It seems like you were expecting to find or provide some documents, but unfortunately, none were found. That's okay! It's an easy fix. 

To resolve this, you can try adding some markdown files to the specified folder. This should help you get started. If you're unsure about how to do this or need more guidance, feel free to ask, and I'll be happy to help. 

Keep going, and I'm sure you'll get everything sorted out!
──────────────────────────────────────────────────

🎉 SESSION COMPLETE!
📊 Summary:
  • Documents: 0
  • Images: 2
  • Embeddings generated: ✅
  • Question asked: ✅
 