In [1]:
import mlflow
import pandas as pd
import os

# Example finance QA dataset
questions = ["What was the company’s revenue in Q2?"]
ground_truth = ["$25.3B"]
predicted_answers = ["$25B"]

mlflow.set_tracking_uri("http://0.0.0.0:5001/")
mlflow.set_experiment("QA_Evaluation")

with mlflow.start_run():
    data = pd.DataFrame(
        {
            "question": questions,
            "ground_truth": ground_truth,
            "predicted": predicted_answers,
        }
    )

    # Evaluate QA model (removed default evaluators to fix hang)
    results = mlflow.evaluate(
        data=data,
        targets="ground_truth",
        predictions="predicted",
        model_type="question-answering",
        evaluators=["default"],  # This was causing the hang - commented out
    )

print(results.metrics)


2025/09/19 15:14:54 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
  from .autonotebook import tqdm as notebook_tqdm
Using default facebook/roberta-hate-speech-dynabench-r4-target checkpoint
Device set to use mps:0


🏃 View run capable-trout-893 at: http://0.0.0.0:5001/#/experiments/627109935499678511/runs/9d743b2aad5f499c82cbb85c6c394098
🧪 View experiment at: http://0.0.0.0:5001/#/experiments/627109935499678511
{'toxicity/v1/mean': np.float64(0.026011325418949127), 'toxicity/v1/variance': np.float64(0.0), 'toxicity/v1/p90': np.float64(0.026011325418949127), 'toxicity/v1/ratio': 0.0, 'flesch_kincaid_grade_level/v1/mean': np.float64(-3.3999999999999986), 'flesch_kincaid_grade_level/v1/variance': np.float64(0.0), 'flesch_kincaid_grade_level/v1/p90': np.float64(-3.3999999999999986), 'ari_grade_level/v1/mean': np.float64(-2.09), 'ari_grade_level/v1/variance': np.float64(0.0), 'ari_grade_level/v1/p90': np.float64(-2.09), 'exact_match/v1': 0.0}


In [None]:
import os
import mlflow
from openai import OpenAI
from dotenv import load_dotenv
import pandas as pd
import httpx
from mlflow.tracking import MlflowClient

# from mlflow.metrics.genai.genai_metric import make_genai_metric
# from mlflow.metrics.genai import (
#     faithfulness,
#     relevance,
#     answer_correctness,
#     answer_relevance,
#     answer_similarity,
# )

# from mlflow.genai.scorers import (
#     RelevanceToQuery,
#     RetrievalGroundedness,
#     RetrievalRelevance,
#     RetrievalSufficiency,
#     Safety,
#     Guidelines,
#     Correctness,
#     ExpectationsGuidelines,
#     Scorer,
# )


In [None]:
load_dotenv()


In [None]:
def check_env_vars():
    required_env_vars = [
        "OPENAI_API_KEY",
        "MLFLOW_TRACKING_URI",
        "MLFLOW_EXPERIMENT_NAME",
        "OLLAMA_BASE_URL",
        "OLLAMA_MODEL",
        "OPENAI_MODEL",
    ]

    for env_var in required_env_vars:
        assert env_var in os.environ, f"{env_var} environment variable must be set"
    print("Environment variables are set")
    return True


check_env_vars()


In [None]:
# Setup MLflow
def setup_mlflow(tracking_uri, mlflow_experiment_name, active_model_name=None):
    """
    function to setup mlflow
    """

    if not tracking_uri:
        raise ValueError("MLFLOW_TRACKING_URI environment variable must be set")
    mlflow.set_tracking_uri(tracking_uri)

    exp_name = mlflow_experiment_name
    client = MlflowClient()
    exp = client.get_experiment_by_name(exp_name)

    if exp is None:
        mlflow.set_experiment(exp_name)
    elif exp.lifecycle_stage == "deleted":
        print("The experiment is deleted. Restoring it...")
        client.restore_experiment(exp.experiment_id)
        mlflow.set_experiment(exp_name)
    else:
        mlflow.set_experiment(exp_name)

    if active_model_name:
        mlflow.set_active_model(name=active_model_name)

    print(f"tracking uri: {mlflow.get_tracking_uri()}")
    print(f"experiment: {mlflow.get_experiment_by_name(mlflow_experiment_name)}")
    print(f"active model: {mlflow.get_active_model_id()}")


In [None]:
questions = ["What was the company’s revenue in Q2?"]
ground_truth = ["$25.3B"]
predicted_answers = ["$25B"]

setup_mlflow(
    tracking_uri=os.getenv("MLFLOW_TRACKING_URI"),
    mlflow_experiment_name="QA_Evaluation",
    # active_model_name="default_anonymous",
)


with mlflow.start_run():
    data = pd.DataFrame(
        {
            "question": questions,
            "ground_truth": ground_truth,
            "predicted": predicted_answers,
        }
    )

    # Evaluate QA model
    # results = mlflow.evaluate(
    #     data=data,
    #     targets="ground_truth",
    #     predictions="predicted",
    #     model_type="question-answering",

    # )
    results = mlflow.evaluate(
        data=data,
        targets="ground_truth",
        predictions="predicted",
        model_type="question-answering",
    )
print(results.metrics)


In [None]:
setup_mlflow(
    tracking_uri=os.getenv("MLFLOW_TRACKING_URI"),
    mlflow_experiment_name=os.getenv("MLFLOW_EXPERIMENT_NAME", "llm_tracing_ollama"),
    active_model_name=os.getenv("MLFLOW_ACTIVE_MODEL_NAME", "llama318b_model"),
)

# Enable MLflow automatic tracing for OpenAI
mlflow.openai.autolog()


In [None]:
# basic openai client request
def basic_query_to_openai(query: str):
    """
    This function is used to query the OpenAI API with a basic query. This is used to test the connection to the OpenAI API.
    """
    client = OpenAI()
    response = client.with_options(timeout=900.0).responses.create(
        model=os.getenv("OPENAI_MODEL"),
        input="In two sentences, describe the biggest loss caused by the crop diseases in India by the farmers",
        # service_tier="flex",
    )
    print(response.output_text)


In [None]:
custom_timeout = httpx.Timeout(10.0, read=180.0)
client = OpenAI(
    base_url=os.getenv("OLLAMA_BASE_URL"),  # The local Ollama REST endpoint
    api_key=os.getenv("OPENAPI_KEY"),
    max_retries=0,
    timeout=custom_timeout,
)

# response = client.chat.completions.create(
#     model=os.getenv("OLLAMA_MODEL"),
#     messages=[
#         {"role": "system", "content": "You are a science teacher."},
#         {"role": "user", "content": "Why is the sky blue?"},
#     ],
# )


In [None]:
@mlflow.trace
def chat_with_ollama(messages):
    response = client.chat.completions.create(
        model=os.getenv("OLLAMA_MODEL"),
        messages=messages,  # timeout=180
    )
    return response


In [None]:
def other_stats(response):
    # print(response)
    print(response.model_dump())


In [None]:
def get_trace():
    # Get the trace object just created
    last_trace_id = mlflow.get_last_active_trace_id()
    print(f"Last trace ID: {last_trace_id}")

    if last_trace_id is None:
        print("No trace found. Make sure MLflow tracing is enabled.")
    else:
        trace = mlflow.get_trace(trace_id=last_trace_id)

        if trace is None:
            print(f"Could not retrieve trace with ID: {last_trace_id}")
        elif trace.info is None:
            print("Trace info is not available.")
        else:
            # Check if token usage is available
            if hasattr(trace.info, "token_usage") and trace.info.token_usage:
                # Print the token usage
                total_usage = trace.info.token_usage
                print("== Total token usage: ==")
                print(f"  Input tokens: {total_usage['input_tokens']}")
                print(f"  Output tokens: {total_usage['output_tokens']}")
                print(f"  Total tokens: {total_usage['total_tokens']}")
            else:
                print("Token usage information not available in trace.info")

            # Print the token usage for each LLM call
            if hasattr(trace, "data") and trace.data and hasattr(trace.data, "spans"):
                print("\n== Detailed usage for each LLM call: ==")
                found_usage = False
                for span in trace.data.spans:
                    if usage := span.get_attribute("mlflow.chat.tokenUsage"):
                        found_usage = True
                        print(f"{span.name}:")
                        print(f"  Input tokens: {usage['input_tokens']}")
                        print(f"  Output tokens: {usage['output_tokens']}")
                        print(f"  Total tokens: {usage['total_tokens']}")

                if not found_usage:
                    print("No token usage found in span attributes")
            else:
                print("Trace data or spans not available")


In [None]:
def test_trace():
    # Test the fix: Re-run the OpenAI call with tracing enabled
    print("Making a new OpenAI call with MLflow tracing enabled...")

    response2 = client.chat.completions.create(
        model=os.getenv("OLLAMA_MODEL"),
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "What is machine learning?"},
        ],
    )
    print(response2.model_dump())
    print("Response generated. Checking for trace...")

    # Now try to get the trace again
    last_trace_id = mlflow.get_last_active_trace_id()
    print(f"New trace ID: {last_trace_id}")

    if last_trace_id:
        trace = mlflow.get_trace(trace_id=last_trace_id)
        if trace and trace.info:
            print("✅ Trace successfully retrieved!")
            print(f"Trace status: {trace.info.status}")
            if hasattr(trace.info, "token_usage") and trace.info.token_usage:
                print(f"Token usage available: {trace.info.token_usage}")
            else:
                print("Token usage not available in trace.info")
        else:
            print("❌ Trace or trace.info still None")
    else:
        print("❌ Still no trace ID found")


In [None]:
def get_trace_for_active_model(n: int = 1):
    active_model_id = mlflow.get_active_model_id()
    return mlflow.search_traces(model_id=active_model_id)


In [None]:
def to_single_line(s: str) -> str:
    return " ".join(s.split())


In [None]:
questions = [
    "What is the insecticide and fungicide chemicals used for leaf disease Black Rot in Tamil Nadu, India?",
    "What are my organic options to cure the disease Black Rot in Tamil Nadu?",
    "will this impact my paddy plants as well?",
]
outputs = []


In [None]:
messages = [
    {"role": "system", "content": "You are a agriculture expert."},
    {
        "role": "user",
        "content": f"{questions[0]}",
    },
]
try:
    response = chat_with_ollama(messages)
    outputs.append(response.choices[0].message.content)
    other_stats(response)
except Exception as e:
    print(f"Error: {e}")


In [None]:
messages = [
    {"role": "system", "content": "You are a agriculture expert."},
    {
        "role": "user",
        "content": f"{questions[1]}",
    },
]
try:
    response = chat_with_ollama(messages)
    outputs.append(response.choices[0].message.content)
    other_stats(response)
except Exception as e:
    print(f"Error: {e}")


In [None]:
messages = [
    {"role": "system", "content": "You are a agriculture expert."},
    {
        "role": "user",
        "content": f"{questions[2]}",
    },
]
try:
    response = chat_with_ollama(messages)
    outputs.append(response.choices[0].message.content)
    other_stats(response)
except Exception as e:
    print(f"Error: {e}")


In [None]:
expected_response_raw = [
    """
            Hey — to give good advice, can you tell me which crop the “black rot” is on (for example, cabbage/crucifers, grape, citrus, etc.)? Different crops + pathogens need different chemicals. Meanwhile, here’s a general idea plus what Tamil Nadu Agricultural University (TNAU) recommends (for crucifers like cabbage) for black rot and similar leaf diseases.

⸻

What is “Black Rot” generally
	•	In crucifers (cabbage, cauliflower etc.), “black rot” is caused by Xanthomonas campestris pv. campestris, a bacterial pathogen. So antibiotics or copper-based bactericides are used, not just fungicides.
	•	There are also fungal diseases called “leaf blight,” “leaf spot,” etc., so need to be clear which you mean.

⸻

TNAU recommendations (Cabbage / Crucifers in Tamil Nadu) for Black Rot (bacterial)

From TNAU’s crop protection guide for cabbage:  ￼
	•	Seed treatment: Dip seeds in streptomycin 100 parts per million (ppm) for 30 minutes.  ￼
	•	Foliar sprays: Two sprays after planting and during head formation with a mixture of copper oxychloride + streptomycin (100 ppm) each time.  ￼

So in this case, the chemical control is:
	•	Streptomycin (an antibiotic)
	•	Copper oxychloride (a copper-based bactericide/fungicide)

⸻

Other chemicals used for leaf disease control (fungal/bacterial) in Tamil Nadu or similar regions

Even though these are for leaf spot, blight, etc., many overlap with what could help prevent secondary fungal infections:
	•	Mancozeb — commonly used for leaf spot, blight, etc.  ￼
	•	Carbendazim — a systemic fungicide used in various leaf spot/blight control.  ￼
	•	Copper-based compounds — e.g. copper oxychloride, copper hydroxide. These are useful for bacterial as well as fungal diseases.  ￼

⸻

What to consider / use depending on the pathogen
	•	If it’s truly bacterial black rot (Xanthomonas in crucifers): use copper compounds + antibiotics (if allowed/legal).
	•	If fungal (leaf spot, anthracnose, etc.): use fungicides like Mancozeb, Carbendazim, or such, depending on fungicide registration and safety.
	•	Always follow label rates, waiting periods, resistance management (rotate chemicals), avoid spray drift, etc.

⸻

If you tell me the crop and symptoms (what the spots look like, if there’s a smell, whether the veins turn black, etc.), I can give you a more specific list of insecticides/fungicides/antibiotics approved in Tamil Nadu (with dosages). Do you want me to pull that together?
            
            """,
    """
            Got it 🌱 Thanks for clarifying — you’re asking about organic ways to manage black rot (most often a bacterial disease on crucifers like cabbage/cauliflower, caused by Xanthomonas campestris). Since this is bacterial, chemical fungicides don’t really help, and organic control is more about prevention + cultural practices with a few allowed sprays.

Here are your organic options, especially suited for Tamil Nadu conditions:

⸻

🌿 Organic & Eco-Friendly Management of Black Rot

1. Seed & Seedling Care
	•	Hot water seed treatment: Soak seeds in water at 50 °C for 30 minutes → kills bacteria hiding on the seed coat.
	•	Trichoderma / Pseudomonas seed treatment: Coat seeds with Pseudomonas fluorescens powder (10 g/kg seed) – this is widely used in Tamil Nadu organic farming.

2. Soil & Nursery Management
	•	Always raise seedlings in disease-free soil or trays (avoid reusing infected soil).
	•	Apply neem cake or Pseudomonas fluorescens in the nursery bed to reduce pathogen survival.

3. Field Practices
	•	Follow 3-year crop rotation (avoid planting crucifers like cabbage, cauliflower, mustard, radish in the same plot repeatedly).
	•	Keep good drainage and spacing – wet, crowded plants spread bacteria faster.
	•	Remove and burn infected leaves/plants as soon as you notice symptoms.

4. Organic Sprays (allowed in organic farming)
	•	Neem oil (2–3%): Has mild antibacterial effect and reduces pest stress (since insect wounds spread bacteria).
	•	Panchagavya or Jeevamrutham foliar sprays: Boosts plant immunity.
	•	Garlic or ginger extract sprays (10%) – traditional practice in TN, with antibacterial properties.
	•	Copper-based sprays (Bordeaux mixture / copper hydroxide): These are permitted in organic farming (under restricted use). They suppress bacterial population on leaves.

5. Biological Control
	•	Regular foliar sprays of Pseudomonas fluorescens @ 0.2% (available from TNAU and organic input shops in Tamil Nadu).
→ It competes with Xanthomonas and prevents spread.

⸻

✅ Best Integrated Organic Strategy in Tamil Nadu
	1.	Hot water treat seeds → coat with Pseudomonas fluorescens.
	2.	Apply neem cake + Pseudomonas in nursery.
	3.	Use wide spacing + good drainage.
	4.	Remove diseased plants early.
	5.	Foliar sprays of neem oil or Pseudomonas regularly, and copper hydroxide (if certified organic standards allow).

⸻

Would you like me to make a simple spray schedule (month-wise) for Tamil Nadu cabbage/cauliflower under organic management, so you can follow step by step?
            
            """,
    """
            Good question 👍 — I can see why you’re worried!

The black rot disease in cabbage/cauliflower (crucifers) is caused by the bacterium Xanthomonas campestris pv. campestris. This pathogen is host-specific — meaning it infects crucifer family plants (like cabbage, cauliflower, mustard, radish, knolkhol, etc.), but not rice (paddy).

⸻

🌾 What about Paddy?
	•	Paddy has its own bacterial diseases, like bacterial leaf blight (BLB) caused by Xanthomonas oryzae pv. oryzae.
	•	Even though both are Xanthomonas, they are different strains (pathovars) and don’t jump from cabbage to rice.

⸻

✅ So the answer is:
	•	No direct impact — your paddy plants won’t get infected from cabbage black rot.
	•	But, if you’re spraying copper-based sprays or neem oil in the same area, those won’t harm paddy (in fact, copper is sometimes used for BLB in rice too).
	•	Just be careful about crop rotation — don’t rotate crucifers with crucifers, but paddy after cabbage is totally safe.

⸻

👉 Do you want me to also share some organic tips for bacterial leaf blight in paddy (since it’s a common issue in Tamil Nadu)? That way, you’ll have both cabbage and rice covered.
            
            """,
]

expected_response = [to_single_line(s) for s in expected_response_raw]

expected_response


In [None]:
eval_df = pd.DataFrame(
    {
        "user_query": questions,
        "expected_response_from_judge": expected_response,
        "generated_response": outputs,
    }
)


In [None]:
scorers = [
    RelevanceToQuery(),  # LLM as judge - relevance
    RetrievalGroundedness(),  # For RAG - groundedness
    # RetrievalRelevance(),  # RAG: passage relevance
    RetrievalSufficiency(),
    # Safety(),  # Safety metric
    # Guidelines(),  # Customizable
    Correctness(),  # Heuristic
    ExpectationsGuidelines(),  # Heuristic
]


In [None]:
extra_metrics_to_track = [
    faithfulness(model="gateway:/openai-gpt-4o"),
    relevance(model="gateway:/openai-gpt-4o"),
    answer_correctness(model="gateway:/openai-gpt-4o"),
    answer_relevance(model="gateway:/openai-gpt-4o"),
    answer_similarity(model="gateway:/openai-gpt-4o"),
    # toxicity(),
    # flesch_kincaid_grade_level(),
]


In [None]:
def old_evaluation_methods(data, targets, predictions, model_type):
    results = mlflow.evaluate(
        data=data,
        targets=targets,
        predictions=predictions,
        model_type=model_type,
    )
    return results


In [None]:
with mlflow.start_run() as evaluation_run:
    eval_dataset_new = mlflow.data.from_pandas(
        df=eval_df,
        name="eval_dataset",
        targets="expected_response_from_judge",
        predictions="generated_response",
    )
    mlflow.log_input(dataset=eval_dataset_new)
    # Run the evaluation based on extra metrics
    # Current active model will be automatically used

    # ######### NOT WORKING
    # result1 = old_evaluation_methods(
    #     data=eval_df,
    #     targets="expected_response_from_judge",
    #     predictions="generated_response",
    #     model_type="question-answering",
    # )
    # result1.tables["eval_results_table"]


In [None]:
# result1 = mlflow.genai.evaluate(
#     data=eval_dataset,
#     # scorers=scorers,
#     model_id=mlflow.get_active_model_id(),
#     # predict_fn=
# )

#### deprecated
# result2 = mlflow.evaluate(
#     data=eval_dataset,
#     # extra_metrics=[
#     #     mlflow.metrics.genai.answer_correctness(model="gateway:/llama3.1:8b"),
#     #     mlflow.metrics.genai.answer_relevance(model="gateway:/llama3.1:8b"),
#     # ],
#     # This is needed since answer_correctness looks for 'inputs' field
#     evaluator_config={"col_mapping": {"inputs": "messages"}},
#     model_type="question-answering",
# )

# result2.tables["eval_results_table"]
