In [1]:
# Base Imports
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv

# RAG Imports
import chromadb
import langchain
import langchainhub

In [2]:
import os
from dotenv import load_dotenv

current_folder = os.getcwd()
env_path = os.path.join(current_folder, '.env')

# Load the .env file using the explicit path
load_dotenv(dotenv_path=env_path, override=True)

print("API Key loaded successfully.")

API Key loaded successfully.


In [3]:
# Embedding Function Import
import chromadb.utils.embedding_functions as embedding_functions
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
                api_key_env_var="OPENAI_API_KEY",
                model_name="text-embedding-3-small"
            )
df_corpus = pd.read_csv("Data520Corpus.csv")
chroma_client = chromadb.PersistentClient("./localCorpus")

In [4]:
df_corpus = df_corpus.fillna("NaN")
df_corpus.to_csv("CleanCorpus.csv")

In [5]:
# Chroma Collection
collection1 = chroma_client.get_or_create_collection(name = "plantCorpus", embedding_function=openai_ef)

In [6]:
from langchain_community.document_loaders import CSVLoader
loader = CSVLoader(file_path="CleanCorpus.csv",
                   encoding="utf-8")
documents = loader.load()

documents_to_add = documents[1:]  # Skip row 0 (the headers in original csv)

all_contents = [doc.page_content for doc in documents_to_add]
all_metadatas = [doc.metadata for doc in documents_to_add]
all_ids = [f"plant_{i}" for i in range(len(documents_to_add))]

# Add to ChromaDB
collection1.add(
    documents=all_contents,
    metadatas=all_metadatas,
    ids=all_ids
)

In [7]:
collection1.peek()

{'ids': ['plant_0',
  'plant_1',
  'plant_2',
  'plant_3',
  'plant_4',
  'plant_5',
  'plant_6',
  'plant_7',
  'plant_8',
  'plant_9'],
 'embeddings': array([[ 0.03074766,  0.00283088,  0.06021107, ...,  0.00032994,
         -0.00185381,  0.01331164],
        [-0.00045501,  0.01059899,  0.05559633, ...,  0.00438245,
          0.02298214,  0.01746743],
        [ 0.00712388, -0.0019319 ,  0.02388312, ...,  0.01023907,
          0.04370925,  0.01342671],
        ...,
        [-0.0176999 , -0.01536726,  0.06013608, ..., -0.03762991,
          0.00680568,  0.04068029],
        [ 0.03344809, -0.0093409 ,  0.02935345, ...,  0.00417461,
          0.00749191,  0.02259729],
        [ 0.01529388,  0.03040319,  0.06286313, ..., -0.00324501,
          0.02502396,  0.02048853]]),
 'documents': [': 1\nPlant ID: 2\nCommon Name: Golden Leatherfern / Mangrove Fern\nScientific Name: Acrostichum aureum\nLocal Name (If Applicable): NaN\nRegion: NaN\nClimate Requirements: Prefers warm tropical climates; f

In [8]:
collection1.count()

14

In [9]:
# Testing collection querying

collection1.query(
    query_texts=["What needs does Soap pod have if i grow it?"],
    n_results= 2
)

{'ids': [['plant_6', 'plant_13']],
 'embeddings': None,
 'documents': [[': 7\nPlant ID: 8\nCommon Name: Club Moss\nScientific Name: Isoetes coromandelin\nLocal Name (If Applicable): NaN\nRegion: NaN\nClimate Requirements: Usually in aquatic environments and moist soils\nSoil Type: NaN\nSun Light Needs: NaN\nWater Needs: NaN\nGrowth Rate: Up to 3 feet\nEcological Role: Often locally dominant; can form extensive stands\nTraditional Uses: NaN',
   ': 14\nPlant ID: 15\nCommon Name: Ashoka\nScientific Name: Saraca indica\nLocal Name (If Applicable): NaN\nRegion: NaN\nClimate Requirements: Requires average rainfall; grows in regular soil\nSoil Type: Regular soil\nSun Light Needs: NaN\nWater Needs: Average\nGrowth Rate: NaN\nEcological Role: NaN\nTraditional Uses: flowers studied for anti-skin cancer properties']],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[{'source': 'CleanCorpus.csv', 'row': 7},
   {'row': 14, 'source': 'CleanCorpus.c

In [10]:
test_query = "Plants in india?"

results = collection1.query(
    query_texts=[test_query],
    n_results=3
)

In [11]:
retrieved_documents = results['documents'][0]

print("Query Results")
for doc in retrieved_documents:
    print(doc)
    print("\n") # Add a separator

Query Results
: 6
Plant ID: 7
Common Name: Hiptage, Helicopter flower
Scientific Name: Hiptage benghalensis
Local Name (If Applicable): NaN
Region: NaN
Climate Requirements: Grows in damp places; needs presence of other trees in vicinity
Soil Type: NaN
Sun Light Needs: NaN
Water Needs: Average
Growth Rate: 6–10 ft (vine)
Ecological Role: Forms woody creepers; spreads rapidly forming thickets and smothering vegetation; considered a weed in many regions
Traditional Uses: Medicinal: used for rheumatism, scabies, asthma, skin complaints & ulcers, inflammation, cough; systems: Ayurveda, Folk Medicine; parts used: root, bark, flower.


: 9
Plant ID: 10
Common Name: Mahua
Scientific Name: Madhuca longifolia
Local Name (If Applicable): NaN
Region: NaN
Climate Requirements: Tropical
Soil Type: NaN
Sun Light Needs: NaN
Water Needs: NaN
Growth Rate: Fast-growing; ~20 m tall (more than 10 ft)
Ecological Role: Spreading root system used to prevent soil erosion
Traditional Uses: Human consumption (e

# RAG Framework

### Retrieval Step - Intialize a vector_store from LangChain using Chroma

In [12]:
from langchain_community.document_loaders import CSVLoader
from langchain_openai import OpenAIEmbeddings 
from langchain_chroma import Chroma
import os
from dotenv import load_dotenv

load_dotenv()

openAI_embed = OpenAIEmbeddings(model = "text-embedding-3-large")
loader = CSVLoader(
    file_path="CleanCorpus.csv",
    encoding = "utf-8"
)
documents = loader.load()

vector_store = Chroma.from_documents(
    documents=documents, 
    embedding=openAI_embed,
    collection_name="plantCorpus_langchain"
)

### Dynamic prompt combining Retrieval 


In [13]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest
SYSTEM_PROMPT_TEMPLATE = """
## Role and Goal
You are an expert landscaping and agricultural assistant with deep, practical knowledge of the Indo-Gangetic plains. Your goal is to provide accurate, specific answers to user queries about plants.

## Source of Truth: Plant Corpus
You will be given context from a specialized `Plant Corpus`. This corpus is your **single source of truth** for all plant-specific data.

### Retrieved Context:
{corpus_content}

## Core Instructions and Rules

1.  **Prioritize Context:** You MUST base all plant facts, recommendations, and data **directly** on the provided `Plant Corpus` context. Do not use your general knowledge if it conflicts with the context.

2.  **Synthesize, Don't Just Repeat:** When answering, intelligently synthesize information from the context's fields. For example, to answer "what plant should I grow," you MUST combine `Climate Requirements`, `Soil Type`, and `Water Needs` to explain *why* a plant is suitable.

3.  **Handle Missing Information (Critical):** If the user asks about a plant, region, or topic that is **not** in the provided context, you MUST clearly state that the information is not available in your specialized corpus. However, you can use your general knowledge to invent an answer for a plant not in list.

4.  **Use Your Persona:** After you have provided the core facts *from the corpus*, you may add a brief, practical tip based on your persona (e.g., "In my experience on the plains, this plant also helps with soil erosion," or "Be sure to protect it from...").

5.  **Corpus Structure (For Your Reference):**
    `Plant ID`, `Common Name`, `Scientific Name`, `Local Name (If Applicable)`, `Region`, `Climate Requirements`, `Soil Type`, `Sun Light Needs`, `Water Needs`, `Growth Rate`, `Ecological Role`, `Traditional Uses`
"""
@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    user_query = request.state["messages"][-1].text
    corpus_context = vector_store.similarity_search(query=user_query)

    corpus_content = "\n".join(doc.page_content for doc in corpus_context)

    system_message = SYSTEM_PROMPT_TEMPLATE.format(corpus_content=corpus_content)
    return system_message

In [14]:
import os
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv

current_folder = os.getcwd()
env_path = os.path.join(current_folder, '.env')
load_dotenv(dotenv_path=env_path, override=True)

if os.getenv("OPENAI_API_KEY"):
    print("API Key loaded successfully.")
else:
    print("ERROR: API Key was not found. Check your .env file.")
model = init_chat_model("gpt-4.1")

print("Chat model initialized.")

API Key loaded successfully.
Chat model initialized.


#### Create Agents

In [15]:
from langchain.agents import AgentState, create_agent

agent = create_agent(model, tools=[], middleware = [prompt_with_context]) 
noRAG_agent = create_agent(model, tools=[], middleware = []) 

### Compare Model + Rag with Model with no Rag


In [16]:
query = "What plant would be recommended to grow in my region (Indo-Gangetic Plain) where my conditions for soil are usually poor nutrients and not a lot of water?"
for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


What plant would be recommended to grow in my region (Indo-Gangetic Plain) where my conditions for soil are usually poor nutrients and not a lot of water?

Based on your conditions—**poor nutrient soil** and **limited water availability**—the most suitable plant from the provided corpus is the **Neem tree (Azadirachta indica)**.

### Why Neem is Suitable:
- **Soil Adaptability:** Neem prefers well-drained sandy to loamy soils but is *highly tolerant* of poor and dry soils, making it ideal for degraded or nutrient-deficient land.
- **Water Needs:** Neem requires *low water* and is *highly drought-resistant*, thriving even in areas with little irrigation.
- **Climate Compatibility:** It thrives in tropical to subtropical climates, which matches the Indo-Gangetic plain.
- **Additional Benefits:** Neem improves soil fertility, provides shade, natural pest control, and supports local biodiversity.

**Practical Tip:**  
In my experience on the plains, neem not only survives tough conditions

In [17]:
for step in noRAG_agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


What plant would be recommended to grow in my region (Indo-Gangetic Plain) where my conditions for soil are usually poor nutrients and not a lot of water?

Given your location in the Indo-Gangetic Plain, and the described soil constraints (poor nutrients and low water availability), you’ll want a plant that is both drought-tolerant and can thrive in less fertile soils. Here are some recommendations:

### 1. **Moringa (Drumstick Tree)**
- **Why?** Extremely hardy, tolerates drought, grows well in poor soils, provides edible leaves, pods, and flowers, and is known for improving soil fertility over time.
- **Uses:** Leaves and pods are highly nutritious, used in cooking and traditional medicine.

### 2. **Pearl Millet (Bajra)**
- **Why?** Adapted to dry, nutrient-poor soils and is a staple crop in arid and semi-arid regions of India.
- **Uses:** Grown for grain (food), fodder.

### 3. **Pigeon Pea (Arhar/Tur)**
- **Why?** Deep roots help access water lower down, tolerates poor soils, als

In [18]:
evaluator_personas = {
    "Evaluator_1_Landscape_Architect": (
        "You are a Senior Landscape Architect for US Embassies. "
        "You prioritize aesthetics, formal structure, and low maintenance. "
        "You are strict about visual appeal and infrastructure safety."
    ),
    "Evaluator_2_Local_Botanist": (
        "You are a PhD Botanist specializing in the Indo-Gangetic Plain. "
        "You care deeply about scientific accuracy, correct Latin names, "
        "and specific soil/climate requirements. You dislike vague answers."
    ),
    "Evaluator_3_Cultural_Historian": (
        "You are an expert in Indian Ethnobotany and Folklore. "
        "You focus on cultural relevance, traditional uses (Ayurveda), "
        "and local naming conventions. You want to see cultural depth."
    ),
    "Evaluator_4_Sustainability_Officer": (
        "You are a Sustainability Officer focused on water conservation. "
        "You heavily penalize plants that require too much water or fertilizer. "
        "You prioritize ecological suitability and native species."
    ),
    "Evaluator_5_General_Resident": (
        "You are a homeowner in the region with average gardening skills. "
        "You care about 'Overall Helpfulness' and simple, clear advice. "
        "You find overly technical jargon unhelpful."
    )
}

In [19]:
import json
with open("rag.json", 'r', encoding="utf-8") as rag_file:
    rag_data = json.load(rag_file)

with open("norag.json", 'r', encoding="utf-8") as norag_file:
    norag_data = json.load(norag_file)

print(f"Loaded {len(rag_data)} RAG responses and {len(norag_data)} NoRAG responses.")

Loaded 10 RAG responses and 10 NoRAG responses.


In [20]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage

def get_evaluation(judge_persona, user_query, model_response):
    system_prompt = f"""
    {judge_persona}
    
    You are evaluating an AI assistant's response to a user query about plants in the Indo-Gangetic Plains.
    
    Please rate the response on these 4 metrics using a 1-5 Likert Scale:
    1. Factual Accuracy (1=Hallucinated/Wrong, 5=Highly Accurate/Cited)
    2. Ecological Suitability (1=Invasive/Deadly, 5=Perfect for Region)
    3. Cultural Relevance (1=Generic/Ignorant, 5=Culturally Insightful/Local Context)
    4. Overall Helpfulness (1=Useless, 5=Very Helpful)
    
    RETURN JSON ONLY in this format:
    {{
        "Factual_Accuracy": int,
        "Ecological_Suitability": int,
        "Cultural_Relevance": int,
        "Overall_Helpfulness": int,
        "Reasoning": "Short explanation (max 1 sentence)"
    }}
    """
    user_content = f"""USER QUERY = {user_query}
                    AI RESPONSE = {model_response}"""
    
    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=user_content)
    ]

    try:
        response = model.invoke(messages)
        # Clean string to ensure it is valid JSON
        content_str = response.content.replace("```json", "").replace("```", "").strip()
        return json.loads(content_str)
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        return None
    

In [21]:
import time
results = []

for i, (rag_item, norag_item) in enumerate(zip(rag_data, norag_data)):

    query = rag_item['query']
    rag_response = rag_item['response']
    norag_response = norag_item['response']

    if rag_item['query'] != norag_item['query']:
        print(f"Warning: Mismatch in queries at index {i}. Skipping.")
        continue

    print(f"Evaluating {i+1}/{len(rag_data)}")

    for evaluator_name, persona_prompt in evaluator_personas.items():
        rag_grade = get_evaluation(persona_prompt, query, rag_response)
        if rag_grade:
            results.append({
                "Query_ID": i+1,
                "System": "RAG",
                "Evaluator": evaluator_name,
                **rag_grade # Unpacks the scores (Factual_Accuracy, etc.)
            })
            
        # --- Grade No-RAG System ---
        norag_grade = get_evaluation(persona_prompt, query, norag_response)
        if norag_grade:
            results.append({
                "Query_ID": i+1,
                "System": "No-RAG",
                "Evaluator": evaluator_name,
                **norag_grade # Unpacks the scores
            })
            
    # Sleep 1 second to avoid hitting rate limits
    time.sleep(1)

# Convert list of dicts to DataFrame
df_results = pd.DataFrame(results)
print("Evaluation Complete.")

Evaluating 1/10
Evaluating 2/10
Evaluating 3/10
Evaluating 4/10
Evaluating 5/10
Evaluating 6/10
Evaluating 7/10
Evaluating 8/10
Evaluating 9/10
Evaluating 10/10
Evaluation Complete.


In [22]:
# A. Calculate IRR using Kripendorff to measure agreement among raters

import krippendorff
if not df_results.empty:
    print("AVERAGE LIKERT SCORES (1-5)")
    summary = df_results.groupby("System")[["Factual_Accuracy", "Ecological_Suitability", "Cultural_Relevance", "Overall_Helpfulness"]].mean()
    print(summary)
    print("\n"+ "INTER-RATER RELIABILITY (Krippendorff's Alpha)")
    print("(Alpha > 0.7 indicates strong agreement among your simulated experts)")

    metrics = ["Factual_Accuracy", "Ecological_Suitability", "Cultural_Relevance", "Overall_Helpfulness"]
    df_results['Response_ID'] = df_results['Query_ID'].astype(str) + "_" + df_results['System']

    for metric in metrics:
        try:
            pivot_table = df_results.pivot(index='Response_ID', columns='Evaluator', values=metric)
            reliability_data = pivot_table.transpose().to_numpy()
            alpha = krippendorff.alpha(reliability_data=reliability_data, level_of_measurement='ordinal')
            print(f"{metric:<25}: {alpha:.4f}")
        except Exception as e:
                print(f"Could not calculate alpha for {metric}: {e}")

        df_results.to_csv("evaluation_results.csv", index=False)

else:
    print("No results generated. Check for errors.")


AVERAGE LIKERT SCORES (1-5)
        Factual_Accuracy  Ecological_Suitability  Cultural_Relevance  \
System                                                                 
No-RAG              4.06                    3.38                3.32   
RAG                 4.80                    4.68                4.62   

        Overall_Helpfulness  
System                       
No-RAG                 3.62  
RAG                    4.76  

INTER-RATER RELIABILITY (Krippendorff's Alpha)
(Alpha > 0.7 indicates strong agreement among your simulated experts)
Factual_Accuracy         : 0.8567
Ecological_Suitability   : 0.9451
Cultural_Relevance       : 0.8811
Overall_Helpfulness      : 0.8529


In [23]:
# B. Semantic Divergence (BERTScore)
# Checks: "Did the RAG system actually change the answer compared to No-RAG?"
# If the score is High (0.95+), your RAG system is redundant. The base model already knew the answer, so retrieving the documents didn't change anything.
# If the score is Lower (< 0.85), it means the RAG system successfully injected new, specific information that forced the model to change its answer.

from bert_score import score as bert_score

df_rag = pd.DataFrame(rag_data)
df_norag = pd.DataFrame(norag_data)

P, R, F1 = bert_score(df_rag['response'].to_list(),
                    df_norag['response'].to_list(),
                    lang ="eng",
                    verbose=False)

df_rag['BERTScore_Similarity'] = F1.numpy()
print(f"Avg Similarity to Base Model: {df_rag['BERTScore_Similarity'].mean():.3f}")





Avg Similarity to Base Model: 0.733


In [None]:
import os
# This creates a dummy environment variable that tells gitpython to shut up
# C. Answer Relevancy (RAGAS)
# This is an AI-driven metric. It takes the Answer generated by your system and asks an LLM to reverse-engineer a question for it. It then compares that reverse-engineered question to the original user query.
# High (>0.8). Ensure the bot isn't just hallucinating generic facts.
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.metrics import answer_relevancy
from ragas import evaluate
from datasets import Dataset


os.environ["GIT_PYTHON_REFRESH"] = "quiet"
gpt4_llm = ChatOpenAI(model="gpt-4o", temperature=0)
ragas_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
ragas_dataset=Dataset.from_dict({
    'question': df_rag['query'].tolist(),
    'answer': df_rag['response'].tolist(),
    # RAGAS requires a 'contexts' column, even if empty for this specific metric
    'contexts': [[''] for _ in range(len(df_rag))]
})

ragas_results = evaluate(dataset=ragas_dataset, metrics=[answer_relevancy], llm=gpt4_llm, embeddings=ragas_embeddings)

df_rag['Ragas_Relevancy'] = ragas_results['answer_relevancy']
print(f"Avg Relevancy Score: {df_rag['Ragas_Relevancy'].mean():.3f}")

Evaluating:   0%|          | 0/10 [00:00<?, ?it/s]

Avg Relevancy Score: 0.664
