In [1]:
# üîó Connect to Existing Plant Disease Knowledge Base & Test RAG System
import os
import warnings
from typing import List
from dotenv import load_dotenv

# LangChain components
from langchain_cohere import CohereEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq
from langchain.schema import Document

# Pinecone
from pinecone import Pinecone

# LangGraph
from langgraph.graph import StateGraph, END
from typing_extensions import TypedDict

# Suppress warnings for clean output
warnings.filterwarnings('ignore')

print("üå± CONNECTING TO EXISTING PLANT DISEASE KNOWLEDGE BASE")
print("=" * 60)

  from .autonotebook import tqdm as notebook_tqdm


üå± CONNECTING TO EXISTING PLANT DISEASE KNOWLEDGE BASE


In [2]:
# üîë Setup API Keys and Environment
load_dotenv()

COHERE_API_KEY = os.getenv("COHERE_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") or "pcsk_2zLsPR_TW281dRvebjuvjaL6MbQLawuMjQyiYWj6wog7FSddx6otQaFj4ESRenCCnqYnmh"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

print("üîë API Keys Status:")
print(f"   Cohere: {'‚úÖ' if COHERE_API_KEY else '‚ùå'}")
print(f"   Pinecone: {'‚úÖ' if PINECONE_API_KEY else '‚ùå'}")
print(f"   Groq: {'‚úÖ' if GROQ_API_KEY else '‚ùå'}")

üîë API Keys Status:
   Cohere: ‚úÖ
   Pinecone: ‚úÖ
   Groq: ‚úÖ


In [3]:
# üß† Initialize Components (Connect to Existing)
print("\nüîß Connecting to existing components...")

# Initialize embeddings
embeddings = CohereEmbeddings(
    cohere_api_key=COHERE_API_KEY,
    model="embed-english-v3.0"
)

# Connect to existing Pinecone index
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("hi")  # Your existing index

# Connect to existing vector store
vector_store = PineconeVectorStore(
    embedding=embeddings,
    index=index
)

# Initialize LLM
llm = ChatGroq(
    groq_api_key=GROQ_API_KEY,
    model_name="llama3-70b-8192",
    temperature=0.1
)

print("‚úÖ Connected to existing components!")


üîß Connecting to existing components...
‚úÖ Connected to existing components!


In [4]:
# üìä Check Existing Knowledge Base
print("\nüìä Checking existing knowledge base...")

try:
    # Check index stats
    stats = index.describe_index_stats()
    print(f"‚úÖ Knowledge Base Status:")
    print(f"   ‚Ä¢ Total vectors: {stats.total_vector_count}")
    print(f"   ‚Ä¢ Dimension: {stats.dimension}")
    
    # Test vector store with a sample query
    test_docs = vector_store.similarity_search("plant disease", k=3)
    print(f"   ‚Ä¢ Sample documents found: {len(test_docs)}")
    
    if test_docs:
        print(f"   ‚Ä¢ Sample content preview: {test_docs[0].page_content[:100]}...")
        print("‚úÖ Knowledge base is ready and contains data!")
    else:
        print("‚ö†Ô∏è No documents found in knowledge base")
        
except Exception as e:
    print(f"‚ùå Error checking knowledge base: {e}")


üìä Checking existing knowledge base...
‚úÖ Knowledge Base Status:
   ‚Ä¢ Total vectors: 2682
   ‚Ä¢ Dimension: 1024
   ‚Ä¢ Sample documents found: 3
   ‚Ä¢ Sample content preview: edge. 
 
1. Introduction: The Challenge of Plant Diseases 
Plant diseases pose a signiÔ¨Åcant and pers...
‚úÖ Knowledge base is ready and contains data!


In [5]:
# üîó Setup RAG System (Connect to Existing Knowledge)
print("\nüîó Setting up RAG system...")

# Define RAG state
class PlantRAGState(TypedDict):
    question: str
    documents: List[Document]
    answer: str

def retrieve_from_existing_kb(state: PlantRAGState) -> PlantRAGState:
    """Retrieve from existing knowledge base"""
    question = state["question"]
    docs = vector_store.similarity_search(question, k=3)
    return {"question": question, "documents": docs, "answer": ""}

def generate_expert_answer(state: PlantRAGState) -> PlantRAGState:
    """Generate expert plant pathology answer"""
    question = state["question"]
    documents = state["documents"]
    
    context = "\n\n".join([doc.page_content for doc in documents])
    
    prompt = f"""
You are a world-class plant pathologist with expertise in disease diagnosis and management.
Answer the question based on the research context from the knowledge base.

Research Context:
{context}

Question: {question}

Provide a detailed, scientific answer with specific symptoms, pathogens, and management strategies when relevant.

Answer:"""
    
    response = llm.invoke(prompt)
    return {"question": question, "documents": documents, "answer": response.content}

# Create RAG workflow
workflow = StateGraph(PlantRAGState)
workflow.add_node("retrieve", retrieve_from_existing_kb)
workflow.add_node("generate", generate_expert_answer)
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", END)

# Compile graph
rag_system = workflow.compile()

print("‚úÖ RAG system connected to existing knowledge base!")


üîó Setting up RAG system...
‚úÖ RAG system connected to existing knowledge base!


In [6]:
# üß™ Test Functions for Existing Knowledge Base
def ask_plant_expert(question: str):
    """Query the existing plant disease knowledge base"""
    print(f"‚ùì Question: {question}")
    print("üîç Searching existing knowledge base...")
    
    try:
        result = rag_system.invoke({"question": question})
        
        print(f"\nüå± Expert Answer:")
        print(f"{result['answer']}")
        print("\n" + "=" * 80)
        
        return result['answer']
        
    except Exception as e:
        print(f"‚ùå Error: {e}")
        return None

def quick_test(question: str):
    """Quick test function"""
    return ask_plant_expert(question)

def detailed_query(question: str, show_sources: bool = True):
    """Detailed query with source information"""
    print(f"‚ùì Detailed Query: {question}")
    print("üîç Analyzing knowledge base...")
    
    try:
        # Get source documents
        docs = vector_store.similarity_search(question, k=3)
        
        # Get answer
        result = rag_system.invoke({"question": question})
        
        print(f"\nüå± Expert Analysis:")
        print(f"{result['answer']}")
        
        if show_sources and docs:
            print(f"\nüìö Sources Used ({len(docs)} documents):")
            for i, doc in enumerate(docs, 1):
                print(f"   {i}. {doc.page_content[:120]}...")
        
        print("\n" + "=" * 80)
        return result['answer']
        
    except Exception as e:
        print(f"‚ùå Error: {e}")
        return None

print("‚úÖ Test functions ready!")

‚úÖ Test functions ready!


In [7]:
# üöÄ Test Your Existing Knowledge Base
print("\nüöÄ TESTING EXISTING PLANT DISEASE KNOWLEDGE BASE")
print("=" * 60)

# Test 1: Basic plant disease question
print("\nüß™ Test 1: Basic Disease Symptoms")
ask_plant_expert("What are the symptoms of bacterial wilt in potato plants?")

# Test 2: Disease management
print("\nüß™ Test 2: Disease Management")
ask_plant_expert("What biological control methods are effective against plant diseases?")

# Test 3: Detection systems
print("\nüß™ Test 3: Detection Technology")
ask_plant_expert("What is the accuracy of automated disease detection systems?")

print("\nüéâ Testing completed! Your existing knowledge base is working perfectly!")


üöÄ TESTING EXISTING PLANT DISEASE KNOWLEDGE BASE

üß™ Test 1: Basic Disease Symptoms
‚ùì Question: What are the symptoms of bacterial wilt in potato plants?
üîç Searching existing knowledge base...


Found document with no `text` key. Skipping.
Found document with no `text` key. Skipping.



üå± Expert Answer:
The symptoms of bacterial wilt in potato plants, caused by the pathogen Ralstonia solanacearum, are characterized by rapid wilting of the plant without initial yellowing, stunting of growth, and yellowing of the lower leaves. A definitive sign of bacterial wilt is the presence of a creamy white bacterial ooze from the cut stem when placed in water, accompanied by browning of the internal vascular tissue.

These symptoms are indicative of a severe and destructive soil-borne disease that can have devastating effects on potato crops. The rapid wilting of the plant is a result of the bacterium's ability to colonize the xylem vessels, causing a blockage that prevents water and nutrient uptake, ultimately leading to plant death.

In addition to these symptoms, bacterial wilt can also cause a range of other effects on potato plants, including reduced tuber yield and quality, and increased susceptibility to other diseases.

Fortunately, research has identified effective ma

Found document with no `text` key. Skipping.
Found document with no `text` key. Skipping.



üå± Expert Answer:
An excellent question! As a world-class plant pathologist, I'm delighted to share my expertise on effective biological control methods against plant diseases.

Biological control methods involve the use of living organisms or their products to manage plant diseases. Two promising approaches are the employment of beneficial biocontrol agents and the leveraging of plant defense mechanisms through advanced genetic tools.

**Beneficial Biocontrol Agents:**

1. **Bacillus and Paenibacillus species:** These bacteria have been shown to be effective biocontrol agents against various plant diseases. For instance, Aliye et al. (2008) demonstrated that certain rhizosphere bacterial antagonists, such as Bacillus and Paenibacillus species, can bioprotect potato plants against bacterial wilt caused by Ralstonia solanacearum. These beneficial bacteria can outcompete the pathogen for resources, produce antibiotics, and induce systemic resistance in plants.

**Symptoms of bacterial

Found document with no `text` key. Skipping.



üå± Expert Answer:
According to the research context, the automated image-based detection system developed by Arivazhagan et al. (2013) achieved an accuracy of over 94% in detecting and classifying leaf diseases using computer vision. This system employed a processing pipeline that involved image acquisition, color space transformation, masking and segmentation, and texture feature extraction to identify diseased regions on plant leaves.

The high accuracy of this system can be attributed to the use of the HSI color space, which separates color information from brightness, making it more robust for identifying disease spots under varying light conditions. The system's ability to extract statistical texture features from the diseased spots using a Color Co-occurrence Matrix (CCM) also contributed to its high accuracy.

It is worth noting that the accuracy of automated disease detection systems can vary depending on the specific disease, plant species, and environmental conditions. The

In [None]:
# üí¨ Interactive System for Existing Knowledge Base
print("\nüí¨ INTERACTIVE PLANT DISEASE EXPERT SYSTEM")
print("=" * 50)
print("üå± Connected to your existing knowledge base!")
print("\nüéØ Available Functions:")
print("   ‚Ä¢ ask_plant_expert('your question')")
print("   ‚Ä¢ quick_test('your question')")
print("   ‚Ä¢ detailed_query('your question', show_sources=True)")

print("\nüå± Example Questions You Can Ask:")
example_questions = [
    "What causes potato late blight?",
    "How to control bacterial wilt?",
    "What are the symptoms of early blight?",
    "How effective is biological control?",
    "What is PTGS in plant virus resistance?",
    "How do giant cells form in nematode infections?",
    "What are the main plant pathogens?",
    "How does automated disease detection work?",
    "What are the symptoms of viral diseases in plants?",
    "What biological agents are used for disease control?"
]

for i, q in enumerate(example_questions, 1):
    print(f"   {i:2d}. '{q}'")

print(f"\nüöÄ Your system is ready! Knowledge base contains {stats.total_vector_count if 'stats' in locals() else 'many'} vectors.")
print("üìã No need to upload files again - using existing knowledge base!")

# Example usage (uncomment to test):
# ask_plant_expert("What are the main plant diseases in the knowledge base?")
# detailed_query("How does late blight spread?", show_sources=True)

In [None]:
# üéØ Quick Demo of Your Existing System
print("\nüéØ QUICK DEMO - Testing Your Existing Knowledge Base")
print("=" * 55)

demo_questions = [
    "What are the main symptoms of late blight?",
    "How accurate is automated disease detection?",
    "What biological control methods are mentioned?"
]

for i, question in enumerate(demo_questions, 1):
    print(f"\nüî¨ Demo {i}:")
    quick_test(question)

print("\n‚úÖ Demo completed! Your existing plant disease knowledge base is fully functional!")
print("üéâ You can now ask any plant disease questions without re-uploading files!")

In [None]:
# Simple usage
ask_plant_expert("What causes bacterial wilt?")

# With sources
detailed_query("How does late blight spread?", show_sources=True)

# Quick test
quick_test("What are plant disease symptoms?")

In [None]:
# Setup (run once)
import os
import warnings
from typing import List
from dotenv import load_dotenv
from langchain_cohere import CohereEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq
from langchain.schema import Document
from pinecone import Pinecone
from langgraph.graph import StateGraph, END
from typing_extensions import TypedDict

warnings.filterwarnings('ignore')
load_dotenv()

# API Keys
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") or "pcsk_2zLsPR_TW281dRvebjuvjaL6MbQLawuMjQyiYWj6wog7FSddx6otQaFj4ESRenCCnqYnmh"
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Initialize components
embeddings = CohereEmbeddings(cohere_api_key=COHERE_API_KEY, model="embed-english-v3.0")
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("hi")
vector_store = PineconeVectorStore(embedding=embeddings, index=index)
llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="llama3-70b-8192", temperature=0.1)

# RAG System
class RAGState(TypedDict):
    question: str
    documents: List[Document]
    answer: str

def retrieve(state: RAGState) -> RAGState:
    docs = vector_store.similarity_search(state["question"], k=3)
    return {"question": state["question"], "documents": docs, "answer": ""}

def generate(state: RAGState) -> RAGState:
    context = "\n\n".join([doc.page_content for doc in state["documents"]])
    prompt = f"""You are an expert plant pathologist. Answer based on the research context.

Context: {context}

Question: {state["question"]}

Answer:"""
    response = llm.invoke(prompt)
    return {"question": state["question"], "documents": state["documents"], "answer": response.content}

workflow = StateGraph(RAGState)
workflow.add_node("retrieve", retrieve)
workflow.add_node("generate", generate)
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", END)
rag = workflow.compile()

# Main function - Input question, Output answer
def ask(question: str) -> str:
    """
    Ask a plant disease question and get an answer.
    
    Args:
        question (str): Your plant disease question
        
    Returns:
        str: Expert answer
    """
    try:
        result = rag.invoke({"question": question})
        return result["answer"]
    except Exception as e:
        return f"Error: {str(e)}"
    
# Usage Examples
if __name__ == "__main__":
    
    # Example 1
    question1 = "What are the symptoms of bacterial wilt in potato plants?"
    answer1 = ask(question1)
    print(f"Q: {question1}")
    print(f"A: {answer1}\n")
    

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
COHERE_API_KEY

---

# üìä CHATBOT EVALUATION MATRIX

**Comprehensive evaluation of your Plant Disease RAG Chatbot**

This section will test your chatbot with various questions and provide detailed scoring.

---

In [None]:
# üìä Evaluation Setup and Test Dataset
import time
import json
from datetime import datetime

# Evaluation dataset with expected answers
evaluation_questions = [
    {
        "category": "Bacterial Diseases",
        "question": "What are the symptoms of bacterial wilt in potato plants?",
        "expected_keywords": ["rapid wilting", "yellowing", "stunting", "bacterial ooze", "vascular tissue", "Ralstonia solanacearum"],
        "difficulty": "Easy",
        "expected_score": 9
    },
    {
        "category": "Fungal Diseases",
        "question": "How does late blight affect potato leaves?",
        "expected_keywords": ["water-soaked spots", "dark brown", "black", "white mold", "Phytophthora infestans"],
        "difficulty": "Easy",
        "expected_score": 9
    },
    {
        "category": "Disease Management",
        "question": "What biological control methods are effective against plant diseases?",
        "expected_keywords": ["Bacillus", "Paenibacillus", "biocontrol", "rhizosphere bacteria", "PGPR"],
        "difficulty": "Medium",
        "expected_score": 8
    },
    {
        "category": "Detection Systems",
        "question": "What is the accuracy of automated disease detection systems?",
        "expected_keywords": ["94%", "computer vision", "image analysis", "SVM", "texture features"],
        "difficulty": "Medium",
        "expected_score": 8
    },
    {
        "category": "Viral Diseases",
        "question": "What is PTGS in plant virus resistance?",
        "expected_keywords": ["Post-Transcriptional Gene Silencing", "PVX", "amplicon", "gene silencing"],
        "difficulty": "Hard",
        "expected_score": 7
    },
    {
        "category": "Disease Symptoms",
        "question": "What are the characteristics of early blight disease?",
        "expected_keywords": ["Alternaria solani", "bullseye pattern", "concentric rings", "dark lesions"],
        "difficulty": "Easy",
        "expected_score": 9
    }
]

print(f"üìã Evaluation dataset ready with {len(evaluation_questions)} test questions")
print("üìä Categories covered:")
categories = set([item['category'] for item in evaluation_questions])
for cat in categories:
    count = len([item for item in evaluation_questions if item['category'] == cat])
    print(f"   ‚Ä¢ {cat}: {count} questions")

In [None]:
# üßÆ Evaluation Scoring Functions

def calculate_keyword_score(answer: str, expected_keywords: list) -> float:
    """Calculate score based on keyword presence"""
    answer_lower = answer.lower()
    found_keywords = [kw for kw in expected_keywords if kw.lower() in answer_lower]
    score = len(found_keywords) / len(expected_keywords) if expected_keywords else 0
    return score, found_keywords

def calculate_response_quality_score(answer: str) -> float:
    """Calculate overall response quality"""
    word_count = len(answer.split())
    
    # Length score (optimal: 50-300 words)
    if 50 <= word_count <= 300:
        length_score = 1.0
    elif word_count < 50:
        length_score = word_count / 50
    else:
        length_score = max(0.5, 1.0 - (word_count - 300) / 500)
    
    # Scientific terminology score
    scientific_terms = ["pathogen", "symptom", "disease", "infection", "management", 
                       "control", "diagnosis", "treatment", "prevention", "research"]
    answer_lower = answer.lower()
    found_terms = [term for term in scientific_terms if term in answer_lower]
    scientific_score = min(1.0, len(found_terms) / 5)
    
    # Structure score (check for organized response)
    sentences = answer.split('.')
    structure_score = min(1.0, len([s for s in sentences if len(s.strip()) > 10]) / 3)
    
    return {
        'length_score': length_score,
        'scientific_score': scientific_score,
        'structure_score': structure_score,
        'word_count': word_count
    }

def evaluate_response(question: str, answer: str, expected_keywords: list, 
                     difficulty: str, expected_score: int) -> dict:
    """Comprehensive evaluation of a single response"""
    
    # Calculate scores
    keyword_score, found_keywords = calculate_keyword_score(answer, expected_keywords)
    quality_metrics = calculate_response_quality_score(answer)
    
    # Weight scores based on difficulty
    if difficulty == "Easy":
        weights = {'keyword': 0.5, 'length': 0.2, 'scientific': 0.2, 'structure': 0.1}
    elif difficulty == "Medium":
        weights = {'keyword': 0.4, 'length': 0.25, 'scientific': 0.25, 'structure': 0.1}
    else:  # Hard
        weights = {'keyword': 0.35, 'length': 0.3, 'scientific': 0.25, 'structure': 0.1}
    
    # Calculate weighted overall score
    overall_score = (
        keyword_score * weights['keyword'] +
        quality_metrics['length_score'] * weights['length'] +
        quality_metrics['scientific_score'] * weights['scientific'] +
        quality_metrics['structure_score'] * weights['structure']
    ) * 10  # Convert to 10-point scale
    
    return {
        'question': question,
        'answer': answer,
        'keyword_score': keyword_score,
        'keywords_found': found_keywords,
        'quality_metrics': quality_metrics,
        'overall_score': overall_score,
        'expected_score': expected_score,
        'performance_ratio': overall_score / expected_score if expected_score > 0 else 0,
        'difficulty': difficulty,
        'grade': 'A+' if overall_score >= 9 else 'A' if overall_score >= 8 else 'B+' if overall_score >= 7 else 'B' if overall_score >= 6 else 'C' if overall_score >= 5 else 'D'
    }

print("‚úÖ Evaluation functions ready!")

In [None]:
# üöÄ Run Complete Chatbot Evaluation

def run_chatbot_evaluation():
    """Run the complete evaluation on your chatbot"""
    
    results = []
    
    print("üîÑ RUNNING CHATBOT EVALUATION")
    print("=" * 50)
    print(f"üìù Testing {len(evaluation_questions)} questions...\n")
    
    for i, test_case in enumerate(evaluation_questions, 1):
        print(f"üìã Test {i}/{len(evaluation_questions)}: {test_case['category']}")
        print(f"‚ùì Question: {test_case['question']}")
        
        # Get chatbot response using your existing function
        start_time = time.time()
        try:
            answer = ask_plant_expert(test_case['question'])
            response_time = time.time() - start_time
            
            if answer is None:
                answer = "Error: No response generated"
                
        except Exception as e:
            answer = f"Error: {str(e)}"
            response_time = time.time() - start_time
        
        # Evaluate the response
        evaluation = evaluate_response(
            test_case['question'],
            answer,
            test_case['expected_keywords'],
            test_case['difficulty'],
            test_case['expected_score']
        )
        
        # Add metadata
        evaluation['category'] = test_case['category']
        evaluation['response_time'] = response_time
        evaluation['timestamp'] = datetime.now().isoformat()
        
        results.append(evaluation)
        
        # Display results
        print(f"‚≠ê Score: {evaluation['overall_score']:.1f}/10 (Expected: {test_case['expected_score']}) - Grade: {evaluation['grade']}")
        print(f"üéØ Keywords found: {len(evaluation['keywords_found'])}/{len(test_case['expected_keywords'])} - {evaluation['keywords_found']}")
        print(f"‚è±Ô∏è Response time: {response_time:.2f}s")
        print(f"üìù Word count: {evaluation['quality_metrics']['word_count']}")
        print("-" * 80)
        
        # Rate limiting to avoid API issues
        time.sleep(2)
    
    return results

# Run the evaluation
print("üéØ Starting evaluation of your Plant Disease Chatbot...")
evaluation_results = run_chatbot_evaluation()

In [None]:
# üìä Generate Evaluation Report

def generate_evaluation_report(results):
    """Generate comprehensive evaluation report"""
    
    if not results:
        print("‚ùå No evaluation results to analyze")
        return
    
    # Calculate overall metrics
    scores = [r['overall_score'] for r in results]
    response_times = [r['response_time'] for r in results]
    
    overall_metrics = {
        'total_questions': len(results),
        'average_score': sum(scores) / len(scores),
        'min_score': min(scores),
        'max_score': max(scores),
        'average_response_time': sum(response_times) / len(response_times)
    }
    
    # Category performance
    categories = {}
    for result in results:
        cat = result['category']
        if cat not in categories:
            categories[cat] = []
        categories[cat].append(result['overall_score'])
    
    category_averages = {cat: sum(scores)/len(scores) for cat, scores in categories.items()}
    
    # Grade distribution
    grades = {}
    for result in results:
        grade = result['grade']
        grades[grade] = grades.get(grade, 0) + 1
    
    # Performance analysis
    excellent_count = len([r for r in results if r['overall_score'] >= 9])
    good_count = len([r for r in results if 7 <= r['overall_score'] < 9])
    needs_improvement = len([r for r in results if r['overall_score'] < 7])
    
    # Display report
    print("\nüìä CHATBOT EVALUATION REPORT")
    print("=" * 60)
    
    print("\nüéØ Overall Performance:")
    print(f"   ‚Ä¢ Total Questions: {overall_metrics['total_questions']}")
    print(f"   ‚Ä¢ Average Score: {overall_metrics['average_score']:.2f}/10")
    print(f"   ‚Ä¢ Score Range: {overall_metrics['min_score']:.1f} - {overall_metrics['max_score']:.1f}")
    print(f"   ‚Ä¢ Average Response Time: {overall_metrics['average_response_time']:.2f}s")
    
    print("\nüìö Grade Distribution:")
    for grade in ['A+', 'A', 'B+', 'B', 'C', 'D']:
        count = grades.get(grade, 0)
        percentage = (count / len(results)) * 100
        if count > 0:
            print(f"   ‚Ä¢ {grade}: {count} questions ({percentage:.1f}%)")
    
    print("\nüìã Category Performance:")
    for category, avg_score in sorted(category_averages.items(), key=lambda x: x[1], reverse=True):
        print(f"   ‚Ä¢ {category}: {avg_score:.1f}/10")
    
    print("\nüé≠ Performance Summary:")
    print(f"   ‚Ä¢ Excellent (‚â•9): {excellent_count} questions ({excellent_count/len(results)*100:.1f}%)")
    print(f"   ‚Ä¢ Good (7-8.9): {good_count} questions ({good_count/len(results)*100:.1f}%)")
    print(f"   ‚Ä¢ Needs Improvement (<7): {needs_improvement} questions ({needs_improvement/len(results)*100:.1f}%)")
    
    # Recommendations
    print("\nüí° Recommendations:")
    if overall_metrics['average_score'] >= 8.5:
        print("   ‚úÖ Excellent performance! Your chatbot is working very well.")
    elif overall_metrics['average_score'] >= 7.5:
        print("   üëç Good performance! Minor improvements could enhance quality.")
    elif overall_metrics['average_score'] >= 6.5:
        print("   ‚ö†Ô∏è Moderate performance. Consider improving knowledge base coverage.")
    else:
        print("   üîß Significant improvements needed. Review knowledge base and prompting.")
    
    # Find weakest category
    weakest_category = min(category_averages.items(), key=lambda x: x[1])
    if weakest_category[1] < 7:
        print(f"   üéØ Focus on improving: {weakest_category[0]} (score: {weakest_category[1]:.1f})")
    
    if overall_metrics['average_response_time'] > 3:
        print(f"   ‚ö° Consider optimizing response time (current: {overall_metrics['average_response_time']:.1f}s)")
    
    return overall_metrics, category_averages, grades

# Generate the report
metrics, categories, grades = generate_evaluation_report(evaluation_results)

In [None]:
# üíæ Export Evaluation Results

def export_evaluation_results(results, metrics, categories, grades):
    """Export evaluation results to files"""
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Create detailed results as text (since pandas might not be available)
    detailed_results = []
    detailed_results.append("Plant Disease Chatbot Evaluation Results")
    detailed_results.append("=" * 50)
    detailed_results.append(f"Evaluation Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    detailed_results.append(f"Total Questions: {len(results)}")
    detailed_results.append("")
    
    for i, result in enumerate(results, 1):
        detailed_results.append(f"Question {i}: {result['category']}")
        detailed_results.append(f"Q: {result['question']}")
        detailed_results.append(f"Score: {result['overall_score']:.1f}/10 (Grade: {result['grade']})")
        detailed_results.append(f"Keywords Found: {result['keywords_found']}")
        detailed_results.append(f"Response Time: {result['response_time']:.2f}s")
        detailed_results.append(f"Word Count: {result['quality_metrics']['word_count']}")
        detailed_results.append("-" * 40)
        detailed_results.append("")
    
    # Save detailed results
    detailed_filename = f"chatbot_evaluation_detailed_{timestamp}.txt"
    with open(detailed_filename, 'w', encoding='utf-8') as f:
        f.write("\n".join(detailed_results))
    
    # Create summary report
    summary_data = {
        "evaluation_timestamp": timestamp,
        "system_info": {
            "model": "Groq Llama3-70B",
            "embeddings": "Cohere embed-english-v3.0",
            "vector_store": "Pinecone",
            "knowledge_base": "Plant Disease Research (leaf_train.pdf)"
        },
        "overall_metrics": metrics,
        "category_performance": categories,
        "grade_distribution": grades,
        "detailed_results": [
            {
                "question": r['question'],
                "category": r['category'],
                "score": r['overall_score'],
                "grade": r['grade'],
                "keywords_found": r['keywords_found'],
                "response_time": r['response_time']
            } for r in results
        ]
    }
    
    # Save summary as JSON
    summary_filename = f"chatbot_evaluation_summary_{timestamp}.json"
    with open(summary_filename, 'w', encoding='utf-8') as f:
        json.dump(summary_data, f, indent=2, ensure_ascii=False)
    
    print("\nüíæ EVALUATION RESULTS EXPORTED")
    print("=" * 40)
    print(f"üìÑ Detailed Results: {detailed_filename}")
    print(f"üìã Summary Report: {summary_filename}")
    
    return detailed_filename, summary_filename

# Export the results
if 'evaluation_results' in locals() and evaluation_results:
    detailed_file, summary_file = export_evaluation_results(evaluation_results, metrics, categories, grades)
    
    print("\nüéâ EVALUATION COMPLETED SUCCESSFULLY!")
    print("=" * 50)
    print("\nüìä Summary:")
    if metrics:
        print(f"   ‚Ä¢ Average Score: {metrics['average_score']:.2f}/10")
        print(f"   ‚Ä¢ Questions Tested: {metrics['total_questions']}")
        print(f"   ‚Ä¢ Average Response Time: {metrics['average_response_time']:.2f}s")
    
    print("\nüéØ Next Steps:")
    print("   1. Review the detailed results file")
    print("   2. Focus on improving low-scoring categories")
    print("   3. Re-run evaluation after improvements")
    print("   4. Track progress over time")
    
    print("\n‚úÖ Your Plant Disease Chatbot evaluation is complete!")
else:
    print("‚ùå No evaluation results found. Please run the evaluation first.")