# Agentic RAG with Local Ollama Model
This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) agent using LangGraph, LangChain, and a local  model run via Ollama.

Adapted from: https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_agentic_rag/

## Materials
This notebook and all materials referenced here can be found on Sol `/data/sse/ai-accelerated-spark`.

## 1. Import libraries

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
from langchain_core.tools import Tool
from langgraph.graph import Graph
from langchain.text_splitter import CharacterTextSplitter
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import torch
from typing import List
from pydantic import BaseModel, Field
import os

os.environ["USER_AGENT"] = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"

## 2. Preprocess documents
### 2.1. Fetch documents

In [None]:
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://medium.com/cupy-team/announcing-cupy-v13-66979ee7fab0",
    "https://www.unum.cloud/blog/2022-01-26-cupy",
    "https://medium.com/rapids-ai/easy-cpu-gpu-arrays-and-dataframes-run-your-dask-code-where-youd-like-e349d92351d"
]

docs = [WebBaseLoader(url).load() for url in urls]

In [None]:
docs[0][0].page_content.strip()[:1000]

### 2.2. Split the fetched documents into smaller chunks for indexing into the vectorstore

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=100, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs_list)

In [None]:
doc_splits[0].page_content

## 3.Create a retriever tool
### 3.1. Use an in-memory vector store and all-MiniLM-L6-V2 embeddings model

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_huggingface import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = InMemoryVectorStore.from_documents(
    documents = doc_splits, embedding = embedding_model
)
retriever = vectorstore.as_retriever()

In [None]:
# TODO: Use ChromaDB for persistent vectorstore
# https://python.langchain.com/docs/integrations/vectorstores/

### 3.2. Create a retriever tool using LangChain's prebuild `create_retriever_tool`

In [None]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_python_gpu_acceleration",
    "Search and return information about accelerating Python code using the GPU with RAPIDS and CuPy.",
)

### 3.3. Test the tool

In [None]:
retriever_tool.invoke({"query": "How can I create a CuPy-backed Dask array for random data?"})

## 4. Generate query
### 4.1. Load local LLM

Start ollama using the terminal:
```bash
module load ollama/0.9.0
export OLLAMA_MODELS=/data/datasets/community/ollama
ollama-start
```

Check the available list of models using `ollama list`. Let me know via Slack if you would like to use and test other models.

In [None]:
from langchain_ollama import ChatOllama
import socket
from langchain_ollama.llms import OllamaLLM
from langchain.chat_models import init_chat_model

host_node = socket.gethostname()
llm_model = init_chat_model("ollama:qwen3:14b", temperature=0, base_url=f"http://jgarc111@{host_node}:11434/")

### 4.2. Build a `generate_query_or_respond` node

In [None]:
from langgraph.graph import MessagesState
import re

def generate_query_or_respond(state: MessagesState):
    """Call the model to generate a response based on the current state. Given
    the question, it will decide to retrieve using the retriever tool, or simply respond to the user.
    """
    response = (
        llm_model
        .bind_tools([retriever_tool]).invoke(state["messages"])
    )
    # remove thinking text
    content = re.sub(r"<think>.*</think>", "", response.content, flags=re.DOTALL).strip()
    response.content = content
    return {"messages": [response]}

### 4.3. Try a random input

In [None]:
input = {"messages": [{"role": "user", "content": "Hello! What is the color of the sky?"}]}
generate_query_or_respond(input)["messages"][-1].pretty_print()

### 4.4. Try semantic search question

In [None]:
input = {
    "messages": [
        {
            "role": "user",
            "content": "How can I create a CuPy-backed Dask array for random data?",
        }
    ]
}
generate_query_or_respond(input)["messages"][-1].pretty_print()

## 5. Grade documents
### 5.1. Add conditional edge `grade_documents` to determine the relevance of retrieved documents

In [None]:
from pydantic import BaseModel, Field
from typing import Literal

GRADE_PROMPT = (
    "You are a grader assessing relevance of a retrieved document to a user question. \n "
    "Here is the retrieved document: \n\n {context} \n\n"
    "Here is the user question: {question} \n"
    "If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n"
    "Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."
)

class GradeDocuments(BaseModel):
    """Grade documents using a binary score for relevance check."""

    binary_score: str = Field(
        description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
    )


def grade_documents(
    state: MessagesState,
) -> Literal["generate_answer", "rewrite_question"]:
    """Determine whether the retrieved documents are relevant to the question."""
    question = state["messages"][0].content
    context = state["messages"][-1].content
    

    prompt = GRADE_PROMPT.format(question=question, context=context)
    response = (
        llm_model
        .with_structured_output(GradeDocuments).invoke(
            [{"role": "user", "content": prompt}]
        )
    )
    score = response.binary_score

    if score == "yes":
        return "generate_answer"
    else:
        return "rewrite_question"

### 5.2. Try with irrelevant documents in the tool response

In [None]:
from langchain_core.messages import convert_to_messages

input = {
    "messages": convert_to_messages(
        [
            {
                "role": "user",
                "content": "How can I create a CuPy-backed Dask array for random data?",
            },
            {
                "role": "assistant",
                "content": "",
                "tool_calls": [
                    {
                        "id": "1",
                        "name": "retrieve_python_gpu_acceleration",
                        "args": {"query": "creating CuPy-backed Dask arrays for random data"},
                    }
                ],
            },
            {"role": "tool", "content": "meow", "tool_call_id": "1"},
        ]
    )
}
grade_documents(input)

### 5.3. Try with relevant documents

In [None]:
input = {
    "messages": convert_to_messages(
        [
            {
                "role": "user",
                "content": "How can I create a CuPy-backed Dask array for random data?",
            },
            {
                "role": "assistant",
                "content": "",
                "tool_calls": [
                    {
                        "id": "1",
                        "name": "retrieve_python_gpu_acceleration",
                        "args": {"query": "creating CuPy-backed Dask arrays for random data"},
                    }
                ],
            },
            {
                "role": "tool",
                "content": 'Now, we can leverage the array.backend configuration to create a CuPy-backed Dask array for random data:>>> with dask.config.set({“array.backend”: “cupy”}):…    darr = da.random.randint(0, 3, size=(10, 20), chunks=(2, 5)) #\n\n= rs.randint(0, 3, size=(10, 20), chunks=(2, 5))>>> darrdask.array<randint, shape=(10, 20), dtype=int64, chunksize=(2, 5), \\chunktype=cupy.ndarray>Now, we can leverage the array.backend configuration to create a CuPy-backed Dask array for random data:>>> with\n\nfor random array creation.',
                "tool_call_id": "1",
            },
        ]
    )
}
grade_documents(input)

## 6.
### 6.1

In [None]:
REWRITE_PROMPT = (
    "Look at the input and try to reason about the underlying semantic intent / meaning.\n"
    "Here is the initial question:"
    "\n ------- \n"
    "{question}"
    "\n ------- \n"
    "Formulate an improved question:"
)


def rewrite_question(state: MessagesState):
    """Rewrite the original user question."""
    messages = state["messages"]
    question = messages[0].content
    prompt = REWRITE_PROMPT.format(question=question)
    response = llm_model.invoke([{"role": "user", "content": prompt}])
    # remove thinking text
    content = re.sub(r"<think>.*</think>", "", response.content, flags=re.DOTALL).strip()
    response.content = content
    return {"messages": [{"role": "user", "content": response.content}]}

### 6.2 Test

In [None]:
input = {
    "messages": convert_to_messages(
        [
            {
                "role": "user",
                "content": "How can I create a CuPy-backed Dask array for random data?",
            },
            {
                "role": "assistant",
                "content": "",
                "tool_calls": [
                    {
                        "id": "1",
                        "name": "retrieve_python_gpu_acceleration",
                        "args": {"query": "creating CuPy-backed Dask arrays for random data"},
                    }
                ],
            },
            {"role": "tool", "content": "meow", "tool_call_id": "1"},
        ]
    )
}

response = rewrite_question(input)
print(response["messages"][-1]["content"])

## 7. Generate an answer
### 7.1. Build `generate_answer` node

In [None]:
GENERATE_PROMPT = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Use three sentences maximum and keep the answer concise.\n"
    "Question: {question} \n"
    "Context: {context}"
)


def generate_answer(state: MessagesState):
    """Generate an answer."""
    question = state["messages"][0].content
    context = state["messages"][-1].content
    prompt = GENERATE_PROMPT.format(question=question, context=context)
    response = llm_model.invoke([{"role": "user", "content": prompt}])
    # remove thinking text
    content = re.sub(r"<think>.*</think>", "", response.content, flags=re.DOTALL).strip()
    response.content = content
    return {"messages": [response]}

## 7.2 Test

In [None]:
input = {
    "messages": convert_to_messages(
        [
            {
                "role": "user",
                "content": "How can I create a CuPy-backed Dask array for random data?",
            },
            {
                "role": "assistant",
                "content": "",
                "tool_calls": [
                    {
                        "id": "1",
                        "name": "retrieve_python_gpu_acceleration",
                        "args": {"query": "creating CuPy-backed Dask arrays for random data"},
                    }
                ],
            },
            {
                "role": "tool",
                "content": 'Now, we can leverage the array.backend configuration to create a CuPy-backed Dask array for random data:>>> with dask.config.set({“array.backend”: “cupy”}):…    darr = da.random.randint(0, 3, size=(10, 20), chunks=(2, 5)) #\n\n= rs.randint(0, 3, size=(10, 20), chunks=(2, 5))>>> darrdask.array<randint, shape=(10, 20), dtype=int64, chunksize=(2, 5), \\chunktype=cupy.ndarray>Now, we can leverage the array.backend configuration to create a CuPy-backed Dask array for random data:>>> with\n\nfor random array creation.',
                "tool_call_id": "1",
            },
        ]
    )
}

response = generate_answer(input)
response["messages"][-1].pretty_print()

## 8. Assemble the graph

In [None]:
from langgraph.graph import StateGraph, START, END
from langgraph.prebuilt import ToolNode
from langgraph.prebuilt import tools_condition

workflow = StateGraph(MessagesState)

# Define the nodes we will cycle between
workflow.add_node(generate_query_or_respond)
workflow.add_node("retrieve", ToolNode([retriever_tool]))
workflow.add_node(rewrite_question)
workflow.add_node(generate_answer)

workflow.add_edge(START, "generate_query_or_respond")

# Decide whether to retrieve
workflow.add_conditional_edges(
    "generate_query_or_respond",
    # Assess LLM decision (call `retriever_tool` tool or respond to the user)
    tools_condition,
    {
        # Translate the condition outputs to nodes in our graph
        "tools": "retrieve",
        END: END,
    },
)

# Edges taken after the `action` node is called.
workflow.add_conditional_edges(
    "retrieve",
    # Assess agent decision
    grade_documents,
)
workflow.add_edge("generate_answer", END)
workflow.add_edge("rewrite_question", "generate_query_or_respond")

# Compile
graph = workflow.compile()

In [None]:
from IPython.display import Image, display

display(Image(graph.get_graph().draw_mermaid_png()))

## 9. Run the agentic RAG

In [None]:
for chunk in graph.stream(
    {
        "messages": [
            {
                "role": "user",
                "content": "How can I create a CuPy-backed Dask array for random data?",
            }
        ]
    }
):
    for node, update in chunk.items():
        print("Update from node", node)
        update["messages"][-1].pretty_print()
        print("\n\n")

## 10. Graphic User Interface using Gradio

In [None]:
import gradio as gr

def ask_graph(user_input, chat_history):
    result = graph.invoke({
        "messages": [
            {"role": "user", "content": user_input}
        ]
    })

    response = result["messages"][-1].content

    if not chat_history:
        response = [{"role": "user", "content": user_input}, {"role": "assistant", "content": response}]
    else:
        response = chat_history + [{"role": "user", "content": user_input}, {"role": "assistant", "content": response}]

    return "", response

def clear_conversation():
    return "", ""

with gr.Blocks(fill_height=True, fill_width=True) as demo:
    gr.Markdown("### Agentic RAG")

    with gr.Column():

        with gr.Row():
            chatbot = gr.Chatbot(height=350, type="messages")

        with gr.Row():
            with gr.Column(scale=4):
                query_input = gr.Textbox(
                    label="Enter text here", placeholder="Ask something...", lines=1
                    )
            with gr.Column(scale=1):
                with gr.Row():
                    submit_btn = gr.Button("⬆")
                # 🧹 Clear button
                with gr.Row():
                    clear_btn = gr.Button("🧹 Clear Conversation")

        submit_btn.click(
            fn=ask_graph,
            inputs=[query_input, chatbot],
            outputs=[query_input, chatbot],
        )

        query_input.submit(
            fn=ask_graph,
            inputs=[query_input, chatbot],
            outputs=[query_input, chatbot],
        )

        clear_btn.click(
            fn=clear_conversation,
            outputs=[query_input, chatbot],
        )

demo.launch(share=True)

In [None]:
# Check dependencies for Enhanced GPU Mentor
import sys

required_packages = {
    'plotly': 'plotly',
    'gradio': 'gradio', 
    'pandas': 'pandas',
    'numpy': 'numpy'
}

missing_packages = []

for package, import_name in required_packages.items():
    try:
        __import__(import_name)
        print(f"✅ {package} - Available")
    except ImportError:
        print(f"❌ {package} - Missing")
        missing_packages.append(package)

if missing_packages:
    print(f"\nInstall missing packages with:")
    print(f"pip install {' '.join(missing_packages)}")
else:
    print("\n🎉 All required packages are available!")

# Check Sol-specific modules (these should be available when running on Sol)
print("\n--- Sol-specific checks ---")
sol_modules = ['subprocess', 'uuid', 'pathlib', 'json', 'tempfile']
for module in sol_modules:
    try:
        __import__(module)
        print(f"✅ {module} - Available")
    except ImportError:
        print(f"❌ {module} - Missing (this should not happen)")

# GPU Mentor: Enhanced RAG with Code Execution & Benchmarking

This enhanced version of the Agentic RAG system includes:
- **Code Execution on Sol**: Submit and execute user code on Sol's GPU nodes
- **Performance Benchmarking**: Compare CPU vs GPU performance with RAPIDS libraries
- **Code Optimization**: Automatically suggest GPU-accelerated alternatives
- **Interactive Learning**: Socratic questioning to guide learning

## Architecture Overview
1. **RAG Agent**: Existing system for answering questions about GPU acceleration
2. **Code Executor**: Submits jobs to Sol via SLURM
3. **Benchmark Engine**: Measures and compares CPU/GPU performance
4. **Code Optimizer**: Suggests RAPIDS/CuPy alternatives
5. **Enhanced UI**: Comprehensive interface for code playground and visualization

In [None]:
# Enhanced imports for GPU Mentor
import subprocess
import tempfile
import time
import json
import uuid
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import ast
import inspect

## 11. Sol Code Executor - SLURM Integration

In [None]:
class SolCodeExecutor:
    """
    Executes code on Sol supercomputer via SLURM job submission.
    Handles both CPU and GPU benchmarking jobs.
    """
    
    def __init__(self, base_work_dir="/tmp/gpu_mentor"):
        self.base_work_dir = Path(base_work_dir)
        self.base_work_dir.mkdir(exist_ok=True)
        
    def create_slurm_script(self, code: str, job_type: str = "cpu", 
                           time_limit: str = "00:15:00", 
                           memory: str = "32G") -> str:
        """Create SLURM batch script for code execution."""
        
        job_id = str(uuid.uuid4())[:8]
        script_content = ""
        
        if job_type == "cpu":
            script_content = f"""#!/bin/bash
#SBATCH --job-name=gpu_mentor_cpu_{job_id}
#SBATCH --partition=general
#SBATCH --qos=public
#SBATCH --time={time_limit}
#SBATCH --cpus-per-task=8
#SBATCH --mem={memory}
#SBATCH --output=cpu_output_{job_id}.out
#SBATCH --error=cpu_error_{job_id}.err

# Load necessary modules
module load python/3.11
module load anaconda3

# Activate conda environment with CPU libraries
source activate base

# Create timing wrapper
cat > benchmark_cpu_{job_id}.py << 'SCRIPT_EOF'
import time
import sys
import traceback
import json

start_time = time.perf_counter()
try:
{self._indent_code(code)}
    execution_status = "success"
    error_message = ""
except Exception as e:
    execution_status = "error"
    error_message = str(e)
    traceback.print_exc()

end_time = time.perf_counter()
execution_time = end_time - start_time

# Save benchmark results
results = {{
    "execution_time": execution_time,
    "job_type": "cpu",
    "job_id": "{job_id}",
    "status": execution_status,
    "error": error_message
}}

with open("cpu_benchmark_{job_id}.json", "w") as f:
    json.dump(results, f)

print(f"CPU Execution time: {{execution_time:.4f}} seconds")
SCRIPT_EOF

# Execute the benchmark script
python benchmark_cpu_{job_id}.py
"""
        else:  # GPU job
            script_content = f"""#!/bin/bash
#SBATCH --job-name=gpu_mentor_gpu_{job_id}
#SBATCH --partition=general
#SBATCH --qos=public
#SBATCH --time={time_limit}
#SBATCH --cpus-per-task=8
#SBATCH --mem={memory}
#SBATCH --gres=gpu:1
#SBATCH --output=gpu_output_{job_id}.out
#SBATCH --error=gpu_error_{job_id}.err

# Load necessary modules
module load python/3.11
module load anaconda3
module load cuda/12.1

# Activate conda environment with GPU libraries
source activate rapids-23.08

# Create timing wrapper
cat > benchmark_gpu_{job_id}.py << 'SCRIPT_EOF'
import time
import sys
import traceback
import json

# Import GPU libraries
try:
    import cupy as cp
    import cudf
    import cuml
    gpu_available = True
except ImportError as e:
    print(f"GPU libraries not available: {{e}}")
    gpu_available = False

start_time = time.perf_counter()
try:
{self._indent_code(code)}
    execution_status = "success"
    error_message = ""
except Exception as e:
    execution_status = "error"
    error_message = str(e)
    traceback.print_exc()

end_time = time.perf_counter()
execution_time = end_time - start_time

# Save benchmark results
results = {{
    "execution_time": execution_time,
    "job_type": "gpu",
    "job_id": "{job_id}",
    "status": execution_status,
    "error": error_message,
    "gpu_available": gpu_available
}}

with open("gpu_benchmark_{job_id}.json", "w") as f:
    json.dump(results, f)

print(f"GPU Execution time: {{execution_time:.4f}} seconds")
SCRIPT_EOF

# Execute the benchmark script
python benchmark_gpu_{job_id}.py
"""
        
        return script_content, job_id
    
    def _indent_code(self, code: str, indent: str = "    ") -> str:
        """Add proper indentation to user code for embedding in script."""
        return "\n".join(indent + line for line in code.split("\n"))
    
    def submit_job(self, script_content: str, job_id: str) -> str:
        """Submit job to SLURM and return job ID."""
        script_path = self.base_work_dir / f"job_{job_id}.sh"
        
        with open(script_path, 'w') as f:
            f.write(script_content)
        
        try:
            # Submit job via sbatch
            result = subprocess.run(
                ["sbatch", str(script_path)],
                capture_output=True,
                text=True,
                cwd=self.base_work_dir
            )
            
            if result.returncode == 0:
                # Extract SLURM job ID from output
                slurm_job_id = result.stdout.strip().split()[-1]
                return slurm_job_id
            else:
                raise Exception(f"Job submission failed: {result.stderr}")
                
        except Exception as e:
            print(f"Error submitting job: {e}")
            return None
    
    def check_job_status(self, slurm_job_id: str) -> str:
        """Check the status of a SLURM job."""
        try:
            result = subprocess.run(
                ["squeue", "-j", slurm_job_id, "-h", "-o", "%T"],
                capture_output=True,
                text=True
            )
            
            if result.returncode == 0 and result.stdout.strip():
                return result.stdout.strip()
            else:
                # Job might be completed, check sacct
                result = subprocess.run(
                    ["sacct", "-j", slurm_job_id, "-n", "-o", "State"],
                    capture_output=True,
                    text=True
                )
                if result.returncode == 0 and result.stdout.strip():
                    return result.stdout.strip().split()[0]
                else:
                    return "UNKNOWN"
        except Exception as e:
            print(f"Error checking job status: {e}")
            return "ERROR"
    
    def get_job_results(self, job_id: str, job_type: str) -> Dict:
        """Retrieve benchmark results from completed job."""
        result_file = self.base_work_dir / f"{job_type}_benchmark_{job_id}.json"
        
        if result_file.exists():
            with open(result_file, 'r') as f:
                return json.load(f)
        else:
            return {"error": "Results file not found"}
    
    def cleanup_job_files(self, job_id: str):
        """Clean up temporary job files."""
        patterns = [
            f"job_{job_id}.sh",
            f"*_output_{job_id}.out",
            f"*_error_{job_id}.err",
            f"*_benchmark_{job_id}.py",
            f"*_benchmark_{job_id}.json"
        ]
        
        for pattern in patterns:
            for file_path in self.base_work_dir.glob(pattern):
                try:
                    file_path.unlink()
                except Exception as e:
                    print(f"Error cleaning up {file_path}: {e}")

# Initialize the Sol executor
sol_executor = SolCodeExecutor()

## 12. Code Optimizer - GPU Acceleration Suggestions

In [None]:
class CodeOptimizer:
    """
    Analyzes user code and suggests GPU-accelerated alternatives using RAPIDS and CuPy.
    """
    
    def __init__(self):
        self.optimization_patterns = {
            # NumPy to CuPy optimizations
            'numpy': {
                'import numpy as np': 'import cupy as np',
                'np.array(': 'cp.array(',
                'np.random.': 'cp.random.',
                'np.linalg.': 'cp.linalg.',
                'np.fft.': 'cp.fft.',
                '.cpu()': '',  # Remove .cpu() calls
            },
            
            # Pandas to cuDF optimizations
            'pandas': {
                'import pandas as pd': 'import cudf as pd',
                'pd.DataFrame(': 'cudf.DataFrame(',
                'pd.Series(': 'cudf.Series(',
                'pd.read_csv(': 'cudf.read_csv(',
                'pd.read_parquet(': 'cudf.read_parquet(',
                '.to_pandas()': '',  # Remove .to_pandas() calls
            },
            
            # Scikit-learn to cuML optimizations
            'sklearn': {
                'from sklearn.': 'from cuml.',
                'sklearn.': 'cuml.',
            },
            
            # Dask optimizations
            'dask': {
                'import dask.array as da': 'import dask.array as da\\n# Configure Dask to use CuPy backend\\nimport dask\\ndask.config.set({"array.backend": "cupy"})',
                'import dask.dataframe as dd': 'import dask_cudf as dd',
            }
        }
    
    def analyze_code(self, code: str) -> Dict[str, any]:
        """Analyze code for optimization opportunities."""
        analysis = {
            'libraries_detected': [],
            'optimization_opportunities': [],
            'estimated_speedup': 1.0,
            'gpu_compatible': True,
            'warnings': []
        }
        
        # Detect libraries used
        for lib_type, patterns in self.optimization_patterns.items():
            for pattern in patterns.keys():
                if pattern in code:
                    analysis['libraries_detected'].append(lib_type)
                    break
        
        # Check for GPU incompatible operations
        incompatible_patterns = [
            'matplotlib.pyplot',  # Plotting might need CPU arrays
            'pickle.dump',        # Serialization issues
            'multiprocessing',    # GPU memory management conflicts
        ]
        
        for pattern in incompatible_patterns:
            if pattern in code:
                analysis['warnings'].append(f"Detected {pattern} - may require CPU data conversion")
        
        # Estimate potential speedup based on operations
        compute_intensive_ops = [
            'np.dot', 'np.matmul', '@',  # Matrix operations
            'np.fft', 'scipy.fft',       # FFT operations
            '.groupby(', '.agg(',        # Aggregation operations
            'for ' in code and 'range(' in code,  # Loops that could be vectorized
        ]
        
        speedup_factors = []
        for op in compute_intensive_ops:
            if isinstance(op, bool):
                if op:
                    speedup_factors.append(5.0)  # Loop vectorization
            elif op in code:
                if 'matmul' in op or 'dot' in op or '@' in op:
                    speedup_factors.append(10.0)  # Matrix ops
                elif 'fft' in op:
                    speedup_factors.append(15.0)  # FFT ops
                else:
                    speedup_factors.append(3.0)   # Other ops
        
        if speedup_factors:
            analysis['estimated_speedup'] = max(speedup_factors)
        
        return analysis
    
    def suggest_optimizations(self, code: str) -> str:
        """Generate GPU-optimized version of the code."""
        optimized_code = code
        
        # Apply optimization patterns
        for lib_type, patterns in self.optimization_patterns.items():
            for old_pattern, new_pattern in patterns.items():
                optimized_code = optimized_code.replace(old_pattern, new_pattern)
        
        # Add GPU-specific optimizations
        if 'import cupy' in optimized_code and 'import cupy as np' not in optimized_code:
            optimized_code = 'import cupy as cp\\n' + optimized_code
        
        # Add memory pool for better performance
        if 'cupy' in optimized_code:
            memory_pool_code = """
# Enable CuPy memory pool for better performance
import cupy
mempool = cupy.get_default_memory_pool()
pinned_mempool = cupy.get_default_pinned_memory_pool()
"""
            optimized_code = memory_pool_code + optimized_code
        
        return optimized_code
    
    def create_benchmark_code(self, original_code: str, optimized_code: str) -> Tuple[str, str]:
        """Create side-by-side benchmark versions."""
        
        cpu_benchmark = f"""
# CPU Version Benchmark
import time
import numpy as np
import pandas as pd

{original_code}
"""
        
        gpu_benchmark = f"""
# GPU Version Benchmark  
import time
import cupy as cp
import cudf as pd

{optimized_code}

# Convert final results back to CPU for comparison if needed
# result = cp.asnumpy(result) if hasattr(result, 'get') else result
"""
        
        return cpu_benchmark, gpu_benchmark

# Initialize the code optimizer
code_optimizer = CodeOptimizer()

## 13. Benchmark Engine - Performance Comparison

In [None]:
class BenchmarkEngine:
    """
    Coordinates CPU vs GPU benchmarking using Sol's compute resources.
    """
    
    def __init__(self, sol_executor: SolCodeExecutor, code_optimizer: CodeOptimizer):
        self.sol_executor = sol_executor
        self.code_optimizer = code_optimizer
        self.benchmark_history = []
    
    def run_comprehensive_benchmark(self, user_code: str, timeout: int = 300) -> Dict:
        """
        Run comprehensive CPU vs GPU benchmark.
        
        Args:
            user_code: Original user code to benchmark
            timeout: Maximum wait time for jobs to complete (seconds)
            
        Returns:
            Dictionary with benchmark results and visualizations
        """
        
        print("🔍 Analyzing code for optimization opportunities...")
        analysis = self.code_optimizer.analyze_code(user_code)
        
        print("⚡ Generating GPU-optimized version...")
        optimized_code = self.code_optimizer.suggest_optimizations(user_code)
        
        # Create benchmark versions
        cpu_code, gpu_code = self.code_optimizer.create_benchmark_code(user_code, optimized_code)
        
        print("🚀 Submitting jobs to Sol...")
        
        # Submit CPU job
        cpu_script, cpu_job_id = self.sol_executor.create_slurm_script(
            cpu_code, job_type="cpu", time_limit="00:15:00"
        )
        cpu_slurm_id = self.sol_executor.submit_job(cpu_script, cpu_job_id)
        
        # Submit GPU job
        gpu_script, gpu_job_id = self.sol_executor.create_slurm_script(
            gpu_code, job_type="gpu", time_limit="00:15:00"
        )
        gpu_slurm_id = self.sol_executor.submit_job(gpu_script, gpu_job_id)
        
        if not cpu_slurm_id or not gpu_slurm_id:
            return {"error": "Failed to submit jobs to Sol"}
        
        print(f"✅ Jobs submitted: CPU ({cpu_slurm_id}), GPU ({gpu_slurm_id})")
        print("⏳ Waiting for jobs to complete...")
        
        # Wait for jobs to complete
        start_wait = time.time()
        cpu_status = gpu_status = "PENDING"
        
        while time.time() - start_wait < timeout:
            cpu_status = self.sol_executor.check_job_status(cpu_slurm_id)
            gpu_status = self.sol_executor.check_job_status(gpu_slurm_id)
            
            print(f"📊 Status - CPU: {cpu_status}, GPU: {gpu_status}")
            
            if cpu_status in ["COMPLETED", "FAILED"] and gpu_status in ["COMPLETED", "FAILED"]:
                break
                
            time.sleep(10)  # Check every 10 seconds
        
        # Collect results
        print("📈 Collecting benchmark results...")
        cpu_results = self.sol_executor.get_job_results(cpu_job_id, "cpu")
        gpu_results = self.sol_executor.get_job_results(gpu_job_id, "gpu")
        
        # Calculate performance metrics
        benchmark_results = self._process_results(
            cpu_results, gpu_results, analysis, user_code, optimized_code
        )
        
        # Store in history
        self.benchmark_history.append({
            "timestamp": datetime.now().isoformat(),
            "results": benchmark_results
        })
        
        # Cleanup
        self.sol_executor.cleanup_job_files(cpu_job_id)
        self.sol_executor.cleanup_job_files(gpu_job_id)
        
        return benchmark_results
    
    def _process_results(self, cpu_results: Dict, gpu_results: Dict, 
                        analysis: Dict, original_code: str, optimized_code: str) -> Dict:
        """Process and format benchmark results."""
        
        results = {
            "analysis": analysis,
            "original_code": original_code,
            "optimized_code": optimized_code,
            "cpu_results": cpu_results,
            "gpu_results": gpu_results,
            "performance_metrics": {},
            "recommendations": []
        }
        
        # Calculate performance metrics
        if (cpu_results.get("status") == "success" and 
            gpu_results.get("status") == "success"):
            
            cpu_time = cpu_results.get("execution_time", 0)
            gpu_time = gpu_results.get("execution_time", 0)
            
            if cpu_time > 0 and gpu_time > 0:
                speedup = cpu_time / gpu_time
                efficiency = (speedup / analysis.get("estimated_speedup", 1.0)) * 100
                
                results["performance_metrics"] = {
                    "cpu_execution_time": cpu_time,
                    "gpu_execution_time": gpu_time,
                    "speedup_factor": speedup,
                    "efficiency_percent": efficiency,
                    "time_saved": cpu_time - gpu_time,
                    "percent_improvement": ((cpu_time - gpu_time) / cpu_time) * 100
                }
        
        # Generate recommendations
        results["recommendations"] = self._generate_recommendations(results)
        
        return results
    
    def _generate_recommendations(self, results: Dict) -> List[str]:
        """Generate educational recommendations based on benchmark results."""
        recommendations = []
        
        metrics = results.get("performance_metrics", {})
        speedup = metrics.get("speedup_factor", 1.0)
        
        if speedup > 5:
            recommendations.append("🎉 Excellent GPU acceleration! This workload benefits significantly from parallel processing.")
        elif speedup > 2:
            recommendations.append("✅ Good GPU speedup achieved. Consider optimizing memory access patterns for even better performance.")
        elif speedup > 1.1:
            recommendations.append("📈 Modest improvement with GPU. This workload may be memory-bound or have limited parallelism.")
        else:
            recommendations.append("⚠️ Limited GPU benefit. Consider if this workload has sufficient computational complexity.")
        
        # Check for optimization opportunities
        analysis = results.get("analysis", {})
        if "numpy" in analysis.get("libraries_detected", []):
            recommendations.append("💡 Consider using CuPy's memory pool for better performance with repeated operations.")
        
        if "pandas" in analysis.get("libraries_detected", []):
            recommendations.append("📊 cuDF provides GPU-accelerated dataframe operations similar to pandas.")
        
        if analysis.get("warnings"):
            recommendations.append("⚠️ Some operations may require CPU-GPU memory transfers. Profile memory usage.")
        
        return recommendations
    
    def create_visualization(self, benchmark_results: Dict) -> go.Figure:
        """Create interactive visualization of benchmark results."""
        
        metrics = benchmark_results.get("performance_metrics", {})
        
        if not metrics:
            # Create error visualization
            fig = go.Figure()
            fig.add_annotation(
                text="Benchmark data not available",
                xref="paper", yref="paper",
                x=0.5, y=0.5, showarrow=False,
                font=dict(size=20)
            )
            return fig
        
        # Create comparison chart
        fig = go.Figure()
        
        # Execution time comparison
        fig.add_trace(go.Bar(
            name='CPU',
            x=['Execution Time'],
            y=[metrics["cpu_execution_time"]],
            marker_color='lightcoral',
            text=[f"{metrics['cpu_execution_time']:.3f}s"],
            textposition='auto'
        ))
        
        fig.add_trace(go.Bar(
            name='GPU',
            x=['Execution Time'],
            y=[metrics["gpu_execution_time"]],
            marker_color='lightblue',
            text=[f"{metrics['gpu_execution_time']:.3f}s"],
            textposition='auto'
        ))
        
        # Add speedup annotation
        speedup = metrics.get("speedup_factor", 1.0)
        fig.add_annotation(
            text=f"🚀 {speedup:.1f}x Speedup",
            xref="paper", yref="paper",
            x=0.7, y=0.9,
            showarrow=False,
            font=dict(size=16, color="green"),
            bgcolor="lightyellow",
            bordercolor="orange",
            borderwidth=2
        )
        
        fig.update_layout(
            title="CPU vs GPU Performance Comparison",
            yaxis_title="Execution Time (seconds)",
            barmode='group',
            template="plotly_white"
        )
        
        return fig

# Initialize the benchmark engine
benchmark_engine = BenchmarkEngine(sol_executor, code_optimizer)

## 14. Enhanced GPU Mentor Agent

In [None]:
class EnhancedGPUMentor:
    """
    Enhanced GPU Mentor that combines RAG capabilities with code execution and benchmarking.
    Provides comprehensive GPU acceleration tutoring with hands-on experimentation.
    """
    
    def __init__(self, rag_graph, benchmark_engine: BenchmarkEngine, code_optimizer: CodeOptimizer):
        self.rag_graph = rag_graph
        self.benchmark_engine = benchmark_engine
        self.code_optimizer = code_optimizer
        self.conversation_history = []
        self.benchmark_results = []
    
    def process_user_input(self, user_input: str, code: str = None) -> Dict:
        """
        Process user input and provide comprehensive response with optional code execution.
        """
        
        response = {
            "text_response": "",
            "code_analysis": None,
            "benchmark_results": None,
            "visualization": None,
            "socratic_questions": [],
            "learning_objectives": []
        }
        
        # Get RAG response for the text query
        rag_result = self.rag_graph.invoke({
            "messages": [{"role": "user", "content": user_input}]
        })
        response["text_response"] = rag_result["messages"][-1].content
        
        # If code is provided, analyze and benchmark it
        if code and code.strip():
            print("🔍 Analyzing provided code...")
            
            # Analyze code for optimization opportunities
            analysis = self.code_optimizer.analyze_code(code)
            response["code_analysis"] = analysis
            
            # Generate optimized version
            optimized_code = self.code_optimizer.suggest_optimizations(code)
            
            # Check if user wants to run benchmark
            benchmark_keywords = ["benchmark", "compare", "test", "performance", "speed", "faster"]
            if any(keyword in user_input.lower() for keyword in benchmark_keywords):
                print("🚀 Running comprehensive benchmark...")
                
                try:
                    benchmark_results = self.benchmark_engine.run_comprehensive_benchmark(code)
                    response["benchmark_results"] = benchmark_results
                    
                    # Create visualization
                    if benchmark_results.get("performance_metrics"):
                        response["visualization"] = self.benchmark_engine.create_visualization(benchmark_results)
                    
                    # Store results
                    self.benchmark_results.append(benchmark_results)
                    
                except Exception as e:
                    response["benchmark_results"] = {"error": f"Benchmark failed: {str(e)}"}
            
            # Generate Socratic questions for learning
            response["socratic_questions"] = self._generate_socratic_questions(analysis, user_input)
            response["learning_objectives"] = self._generate_learning_objectives(analysis)
        
        # Store conversation
        self.conversation_history.append({
            "user_input": user_input,
            "code": code,
            "response": response,
            "timestamp": datetime.now().isoformat()
        })
        
        return response
    
    def _generate_socratic_questions(self, analysis: Dict, user_context: str) -> List[str]:
        """Generate Socratic questions to guide learning based on code analysis."""
        questions = []
        
        libraries = analysis.get("libraries_detected", [])
        estimated_speedup = analysis.get("estimated_speedup", 1.0)
        
        if "numpy" in libraries:
            questions.extend([
                "What types of NumPy operations do you think benefit most from GPU acceleration?",
                "How might memory layout (row-major vs column-major) affect GPU performance?",
                "When would you choose CuPy over NumPy for a specific computation?"
            ])
        
        if "pandas" in libraries:
            questions.extend([
                "Which pandas operations are most computationally expensive in your code?",
                "How does cuDF handle string operations compared to pandas?",
                "What considerations should you make when transferring data between CPU and GPU?"
            ])
        
        if estimated_speedup > 5:
            questions.append("Your code has high parallelization potential. What characteristics make it suitable for GPU acceleration?")
        elif estimated_speedup < 2:
            questions.append("This code may not benefit much from GPU acceleration. Can you identify why?")
        
        # Context-specific questions
        if "loop" in user_context.lower():
            questions.append("How could you vectorize this loop to take advantage of GPU parallel processing?")
        
        if "machine learning" in user_context.lower() or "ml" in user_context.lower():
            questions.append("How do GPU memory patterns differ between training and inference workloads?")
        
        return questions[:3]  # Limit to 3 questions to avoid overwhelming
    
    def _generate_learning_objectives(self, analysis: Dict) -> List[str]:
        """Generate specific learning objectives based on the code analysis."""
        objectives = []
        
        libraries = analysis.get("libraries_detected", [])
        
        if "numpy" in libraries:
            objectives.extend([
                "Understand when to use CuPy vs NumPy",
                "Learn about GPU memory management with CuPy",
                "Master array broadcasting on GPU"
            ])
        
        if "pandas" in libraries:
            objectives.extend([
                "Compare cuDF vs pandas performance characteristics",
                "Learn efficient data transfer patterns",
                "Understand GPU-accelerated groupby operations"
            ])
        
        if "sklearn" in libraries:
            objectives.extend([
                "Explore cuML algorithms and their CPU equivalents",
                "Understand distributed GPU machine learning",
                "Learn about GPU memory requirements for ML models"
            ])
        
        return objectives
    
    def generate_tutorial_content(self, topic: str) -> str:
        """Generate comprehensive tutorial content on specific GPU acceleration topics."""
        
        tutorial_prompt = f"""
        Create a comprehensive tutorial on {topic} for GPU acceleration. Include:
        1. Conceptual explanation
        2. Code examples comparing CPU vs GPU approaches
        3. Performance considerations
        4. Best practices
        5. Common pitfalls to avoid
        
        Focus on practical, hands-on learning with RAPIDS and CuPy libraries.
        """
        
        result = self.rag_graph.invoke({
            "messages": [{"role": "user", "content": tutorial_prompt}]
        })
        
        return result["messages"][-1].content
    
    def get_benchmark_summary(self) -> Dict:
        """Get summary of all benchmark results for learning analysis."""
        if not self.benchmark_results:
            return {"message": "No benchmarks run yet"}
        
        summary = {
            "total_benchmarks": len(self.benchmark_results),
            "average_speedup": 0,
            "best_speedup": 0,
            "worst_speedup": float('inf'),
            "common_patterns": [],
            "recommendations": []
        }
        
        speedups = []
        for result in self.benchmark_results:
            metrics = result.get("performance_metrics", {})
            if metrics:
                speedup = metrics.get("speedup_factor", 1.0)
                speedups.append(speedup)
        
        if speedups:
            summary["average_speedup"] = sum(speedups) / len(speedups)
            summary["best_speedup"] = max(speedups)
            summary["worst_speedup"] = min(speedups)
        
        return summary

# Initialize the enhanced GPU mentor
gpu_mentor = EnhancedGPUMentor(graph, benchmark_engine, code_optimizer)

## 15. Enhanced Gradio Interface - GPU Mentor Playground

In [None]:
import gradio as gr
import json

def chat_with_mentor(message, code, chat_history):
    """Handle chat interactions with the GPU Mentor."""
    
    try:
        # Process user input through the enhanced mentor
        response = gpu_mentor.process_user_input(message, code)
        
        # Format response for chat
        formatted_response = response["text_response"]
        
        # Add code analysis if available
        if response["code_analysis"]:
            analysis = response["code_analysis"]
            formatted_response += f"\n\n**Code Analysis:**\n"
            formatted_response += f"- Libraries detected: {', '.join(analysis['libraries_detected'])}\n"
            formatted_response += f"- Estimated speedup: {analysis['estimated_speedup']:.1f}x\n"
            formatted_response += f"- GPU compatible: {'✅' if analysis['gpu_compatible'] else '❌'}\n"
            
            if analysis['warnings']:
                formatted_response += f"- Warnings: {'; '.join(analysis['warnings'])}\n"
        
        # Add Socratic questions
        if response["socratic_questions"]:
            formatted_response += f"\n\n**Think About This:**\n"
            for i, question in enumerate(response["socratic_questions"], 1):
                formatted_response += f"{i}. {question}\n"
        
        # Update chat history
        if chat_history is None:
            chat_history = []
        
        chat_history.append({"role": "user", "content": f"{message}\n\n```python\n{code}\n```" if code.strip() else message})
        chat_history.append({"role": "assistant", "content": formatted_response})
        
        return "", "", chat_history, response.get("visualization"), response.get("benchmark_results")
        
    except Exception as e:
        error_msg = f"Error: {str(e)}"
        if chat_history is None:
            chat_history = []
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": error_msg})
        return "", "", chat_history, None, None

def run_benchmark_only(code):
    """Run benchmark on code without chat interaction."""
    
    if not code.strip():
        return "Please provide code to benchmark.", None, None
    
    try:
        # Analyze code
        analysis = code_optimizer.analyze_code(code)
        
        # Run benchmark
        benchmark_results = benchmark_engine.run_comprehensive_benchmark(code)
        
        # Create visualization
        viz = None
        if benchmark_results.get("performance_metrics"):
            viz = benchmark_engine.create_visualization(benchmark_results)
        
        # Format results
        if benchmark_results.get("error"):
            return f"Benchmark failed: {benchmark_results['error']}", None, None
        
        metrics = benchmark_results.get("performance_metrics", {})
        if metrics:
            result_text = f"""
**Benchmark Results:**
- CPU Time: {metrics['cpu_execution_time']:.4f} seconds
- GPU Time: {metrics['gpu_execution_time']:.4f} seconds  
- Speedup: {metrics['speedup_factor']:.2f}x
- Time Saved: {metrics['time_saved']:.4f} seconds
- Improvement: {metrics['percent_improvement']:.1f}%

**Recommendations:**
{chr(10).join('• ' + rec for rec in benchmark_results.get('recommendations', []))}
"""
        else:
            result_text = "Benchmark completed but no performance metrics available."
        
        return result_text, viz, benchmark_results
        
    except Exception as e:
        return f"Error running benchmark: {str(e)}", None, None

def analyze_code_only(code):
    """Analyze code for optimization opportunities."""
    
    if not code.strip():
        return "Please provide code to analyze.", ""
    
    try:
        analysis = code_optimizer.analyze_code(code)
        optimized_code = code_optimizer.suggest_optimizations(code)
        
        analysis_text = f"""
**Code Analysis:**
- Libraries detected: {', '.join(analysis['libraries_detected'])}
- Estimated speedup potential: {analysis['estimated_speedup']:.1f}x
- GPU compatible: {'✅ Yes' if analysis['gpu_compatible'] else '❌ No'}

**Optimization Opportunities:**
- Matrix operations: {'✅ Detected' if any(op in code for op in ['np.dot', 'np.matmul', '@']) else '❌ None'}
- Array operations: {'✅ Detected' if 'numpy' in analysis['libraries_detected'] else '❌ None'}
- DataFrame operations: {'✅ Detected' if 'pandas' in analysis['libraries_detected'] else '❌ None'}

**Warnings:**
{chr(10).join('• ' + warning for warning in analysis['warnings']) if analysis['warnings'] else '• None'}
"""
        
        return analysis_text, optimized_code
        
    except Exception as e:
        return f"Error analyzing code: {str(e)}", ""

def get_tutorial(topic):
    """Generate tutorial content for specific topics."""
    
    if not topic.strip():
        return "Please specify a topic for the tutorial."
    
    try:
        tutorial_content = gpu_mentor.generate_tutorial_content(topic)
        return tutorial_content
    except Exception as e:
        return f"Error generating tutorial: {str(e)}"

def clear_chat():
    """Clear chat history."""
    return None, None, None

# Sample code examples for quick testing
sample_codes = {
    "Matrix Multiplication": '''import numpy as np

# Create large matrices
n = 2000
A = np.random.rand(n, n)
B = np.random.rand(n, n)

# Matrix multiplication
C = np.dot(A, B)
print(f"Result shape: {C.shape}")''',
    
    "DataFrame Operations": '''import pandas as pd
import numpy as np

# Create large dataset
n = 1000000
df = pd.DataFrame({
    'x': np.random.randn(n),
    'y': np.random.randn(n),
    'group': np.random.choice(['A', 'B', 'C'], n)
})

# Compute grouped statistics
result = df.groupby('group').agg({
    'x': ['mean', 'std'],
    'y': ['sum', 'count']
})
print(result)''',
    
    "FFT Operations": '''import numpy as np

# Generate signal
n = 1000000
t = np.linspace(0, 1, n)
signal = np.sin(2 * np.pi * 50 * t) + np.sin(2 * np.pi * 120 * t)

# Compute FFT
fft_result = np.fft.fft(signal)
frequencies = np.fft.fftfreq(n)
print(f"FFT computed for {n} points")'''
}

# Create the Gradio interface
with gr.Blocks(title="GPU Mentor - AI Tutor for GPU Acceleration", theme=gr.themes.Soft()) as demo:
    
    gr.Markdown("""
    # 🚀 GPU Mentor: AI Tutor for GPU Acceleration
    
    Learn GPU acceleration with hands-on experimentation! This AI tutor helps you:
    - **Understand** GPU acceleration concepts through conversation
    - **Experiment** with your code in a safe playground 
    - **Benchmark** CPU vs GPU performance on Sol supercomputer
    - **Learn** through Socratic questioning and personalized tutorials
    """)
    
    with gr.Tabs():
        
        # Main Chat & Code Playground Tab
        with gr.Tab("💬 Chat & Code Playground"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Ask Questions & Submit Code")
                    
                    message_input = gr.Textbox(
                        label="Your Question or Comment",
                        placeholder="Ask about GPU acceleration, RAPIDS, CuPy, or describe what you want to learn...",
                        lines=2
                    )
                    
                    code_input = gr.Code(
                        label="Your Python Code (Optional)",
                        language="python",
                        lines=10
                    )
                    
                    with gr.Row():
                        submit_btn = gr.Button("🚀 Submit", variant="primary")
                        clear_btn = gr.Button("🧹 Clear Chat")
                    
                    # Sample code selector
                    gr.Markdown("### Quick Examples")
                    sample_dropdown = gr.Dropdown(
                        choices=list(sample_codes.keys()),
                        label="Load Sample Code",
                        value=None
                    )
                
                with gr.Column(scale=1):
                    gr.Markdown("### AI Mentor Response")
                    chatbot = gr.Chatbot(
                        label="Conversation",
                        height=400,
                        type="messages"
                    )
                    
                    # Benchmark visualization
                    benchmark_plot = gr.Plot(label="Performance Comparison")
        
        # Dedicated Benchmark Tab
        with gr.Tab("📊 Performance Benchmarking"):
            with gr.Row():
                with gr.Column():
                    benchmark_code = gr.Code(
                        label="Code to Benchmark",
                        language="python", 
                        lines=15
                    )
                    
                    run_benchmark_btn = gr.Button("⚡ Run Benchmark on Sol", variant="primary")
                
                with gr.Column():
                    benchmark_results = gr.Textbox(
                        label="Benchmark Results", 
                        lines=15
                    )
                    
                    benchmark_viz = gr.Plot(label="Performance Visualization")
        
        # Code Analysis Tab
        with gr.Tab("🔍 Code Analysis & Optimization"):
            with gr.Row():
                with gr.Column():
                    analyze_code = gr.Code(
                        label="Code to Analyze",
                        language="python",
                        lines=15
                    )
                    
                    analyze_btn = gr.Button("🔍 Analyze Code", variant="primary")
                    
                    analysis_results = gr.Textbox(
                        label="Analysis Results",
                        lines=10
                    )
                
                with gr.Column():
                    optimized_code = gr.Code(
                        label="GPU-Optimized Version",
                        language="python",
                        lines=20
                    )
        
        # Tutorial Generator Tab  
        with gr.Tab("📚 Personalized Tutorials"):
            with gr.Column():
                tutorial_topic = gr.Textbox(
                    label="Tutorial Topic",
                    placeholder="e.g., 'CuPy memory management', 'cuDF vs pandas performance', 'RAPIDS machine learning'...",
                    lines=1
                )
                
                generate_tutorial_btn = gr.Button("📝 Generate Tutorial", variant="primary")
                
                tutorial_content = gr.Markdown(
                    label="Tutorial Content",
                    value="Enter a topic above to generate a personalized tutorial."
                )
        
        # Learning Progress Tab
        with gr.Tab("📈 Learning Progress"):
            with gr.Column():
                gr.Markdown("### Your GPU Acceleration Learning Journey")
                
                progress_btn = gr.Button("📊 View Progress Summary")
                progress_summary = gr.JSON(label="Learning Summary")
    
    # Event handlers
    def load_sample_code(sample_name):
        if sample_name and sample_name in sample_codes:
            return sample_codes[sample_name]
        return ""
    
    # Wire up the interface
    sample_dropdown.change(load_sample_code, inputs=[sample_dropdown], outputs=[code_input])
    
    submit_btn.click(
        chat_with_mentor,
        inputs=[message_input, code_input, chatbot],
        outputs=[message_input, code_input, chatbot, benchmark_plot, gr.State()]
    )
    
    clear_btn.click(clear_chat, outputs=[chatbot, benchmark_plot, gr.State()])
    
    run_benchmark_btn.click(
        run_benchmark_only,
        inputs=[benchmark_code],
        outputs=[benchmark_results, benchmark_viz, gr.State()]
    )
    
    analyze_btn.click(
        analyze_code_only,
        inputs=[analyze_code],
        outputs=[analysis_results, optimized_code]
    )
    
    generate_tutorial_btn.click(
        get_tutorial,
        inputs=[tutorial_topic],
        outputs=[tutorial_content]
    )
    
    progress_btn.click(
        lambda: gpu_mentor.get_benchmark_summary(),
        outputs=[progress_summary]
    )

# Launch the interface
if __name__ == "__main__":
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )

## 16. Example Usage & Testing

Let's test the GPU Mentor system with some example interactions:

In [None]:
# Test the code optimizer
sample_numpy_code = """
import numpy as np

# Create large matrices
n = 1000
A = np.random.rand(n, n)
B = np.random.rand(n, n)

# Matrix multiplication
C = np.dot(A, B)
print(f"Result shape: {C.shape}")
"""

print("=== Testing Code Optimizer ===")
analysis = code_optimizer.analyze_code(sample_numpy_code)
print("Analysis:", analysis)

optimized = code_optimizer.suggest_optimizations(sample_numpy_code)
print("\nOptimized code:")
print(optimized)

In [None]:
# Test the enhanced GPU Mentor (without actual Sol execution for demo)
print("\n=== Testing Enhanced GPU Mentor ===")

# Simulate a user interaction
user_question = "How can I accelerate matrix multiplication with CuPy?"
sample_code = """
import numpy as np
A = np.random.rand(500, 500)
B = np.random.rand(500, 500)
C = np.dot(A, B)
"""

# Test just the RAG response and code analysis (skip actual benchmarking)
try:
    # Get RAG response
    rag_result = gpu_mentor.rag_graph.invoke({
        "messages": [{"role": "user", "content": user_question}]
    })
    print("RAG Response:", rag_result["messages"][-1].content[:200] + "...")
    
    # Analyze code
    analysis = gpu_mentor.code_optimizer.analyze_code(sample_code)
    print("\nCode Analysis:", analysis)
    
    # Generate Socratic questions
    questions = gpu_mentor._generate_socratic_questions(analysis, user_question)
    print("\nSocratic Questions:")
    for i, q in enumerate(questions, 1):
        print(f"{i}. {q}")
        
except Exception as e:
    print(f"Error testing GPU Mentor: {e}")