In [1]:
!pip install git+https://github.com/ibm-granite-community/utils \
    "langchain_community<0.3.0" \
    replicate

Collecting git+https://github.com/ibm-granite-community/utils
  Cloning https://github.com/ibm-granite-community/utils to /tmp/pip-req-build-zh5jwe2_
  Running command git clone --filter=blob:none --quiet https://github.com/ibm-granite-community/utils /tmp/pip-req-build-zh5jwe2_
  Resolved https://github.com/ibm-granite-community/utils to commit 1514191fbbc4605ed4fdfdcb448f2ee41477058f
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
!pip install mlflow

In [None]:
import os
import replicate
import json
from ibm_granite_community.notebook_utils import get_env_var
from langchain_community.llms import Replicate

model = Replicate(
    model="ibm-granite/granite-3.3-8b-instruct",
    replicate_api_token=get_env_var('REPLICATE_API_TOKEN'),
    model_kwargs={"max_tokens":1024, "temperature":0.2},
)

print('✅ Setup complete!')

In [None]:
from typing import Dict, List, Optional
import json

class SimpleAgent:
    def __init__(self):
        """Initialize the SimpleAgent with product database"""
        self.products = [
            {
                "id": "P010",
                "name": "Wireless Headphones",
                "category": "Electronics",
                "description": "High-quality wireless headphones with noise cancellation",
                "price": 149.99,
                "stock": 75,
                "delivery": {
                    "shipping_cost": 4.99,
                    "free_shipping_threshold": 100,
                    "estimated_days": "2-3"
                }
            },
            {
                "id": "P022",
                "name": "Running Shoes",
                "category": "Sports",
                "description": "Lightweight running shoes with cushioned soles",
                "price": 89.99,
                "stock": 120,
                "delivery": {
                    "shipping_cost": 5.99,
                    "free_shipping_threshold": 100,
                    "estimated_days": "3-5"
                }
            },
            {
                "id": "P035",
                "name": "Coffee Maker",
                "category": "Kitchen",
                "description": "Programmable coffee maker with 12-cup capacity",
                "price": 79.99,
                "stock": 45,
                "delivery": {
                    "shipping_cost": 7.99,
                    "free_shipping_threshold": 100,
                    "estimated_days": "2-4"
                }
            },
            {
                "id": "P012",
                "name": "Laptop Stand",
                "category": "Electronics",
                "description": "Adjustable aluminum laptop stand with cooling",
                "price": 29.99,
                "stock": 150,
                "delivery": {
                    "shipping_cost": 3.99,
                    "free_shipping_threshold": 50,
                    "estimated_days": "1-2"
                }
            },
            {
                "id": "P007",
                "name": "Yoga Mat",
                "category": "Sports",
                "description": "Non-slip yoga mat with carrying strap",
                "price": 24.99,
                "stock": 95,
                "delivery": {
                    "shipping_cost": 4.99,
                    "free_shipping_threshold": 50,
                    "estimated_days": "2-3"
                }
            },
            {
                "id": "P045",
                "name": "Wireless Mouse",
                "category": "Electronics",
                "description": "Ergonomic Wireless Mouse with Rechargable Battery",
                "price": 49.99,
                "stock": 18,
                "delivery": {
                    "shipping_cost": 4.99,
                    "free_shipping_threshold": 50,
                    "estimated_days": "2-3"
                }
            }
        ]

    def get_product_info(self, product_id: str) -> str:
        """Tool 1: Get product description and category"""
        product = self._find_product(product_id)
        if not product:
            return "Product not found."

        return f"Product: {product['name']}\nCategory: {product['category']}\nDescription: {product['description']}"

    def get_price(self, product_id: str) -> str:
        """Tool 2: Get product price information"""
        product = self._find_product(product_id)
        if not product:
            return "Product not found."

        return f"Price: ${product['price']:.2f}"

    def get_delivery_info(self, product_id: str) -> str:
        """Tool 3: Get delivery details"""
        product = self._find_product(product_id)
        if not product:
            return "Product not found."

        delivery = product['delivery']
        free_shipping_msg = f"\nFree shipping on orders over ${delivery['free_shipping_threshold']:.2f}"

        return (f"Shipping Cost: ${delivery['shipping_cost']:.2f}\n"
                f"Estimated Delivery: {delivery['estimated_days']} business days"
                f"{free_shipping_msg}")

    def check_stock(self, product_id: str) -> str:
        """Tool 4: Check stock availability"""
        product = self._find_product(product_id)
        if not product:
            return "Product not found."

        stock = product['stock']
        if stock > 100:
            status = "High Stock"
        elif stock > 50:
            status = "Good Stock"
        elif stock > 10:
            status = "Limited Stock"
        else:
            status = "Low Stock"

        return f"Stock Status: {status} ({stock} units available)"

    def _find_product(self, product_id: str) -> Optional[Dict]:
        """Helper method to find a product by ID"""
        for product in self.products:
            if product['id'] == product_id:
                return product
        return None

In [None]:
%pip install replicate
%pip install unitxt
%pip install openai
%pip install litellm
%pip install diskcache
%pip install tenacity
%pip install tabulate
%pip install git+https://github.com/ibm-granite-community/utils


from unitxt.api import evaluate, create_dataset
from unitxt.inference import CrossProviderInferenceEngine
from unitxt.llm_as_judge import LLMJudgeDirect, DirectCriteriaCatalogEnum

from ibm_granite_community.notebook_utils import get_env_var


import nest_asyncio
nest_asyncio.apply()

In [None]:
data = [
    {"question": "I'm looking for an ergonomic Wireless Mouse"},
    {"question": "Is the product available in stock?"}
]

predictions = [
    """Product P045 is a wireless mouse with ergonomic support and programmable buttons""",
    """Product P045 is currently available in your selected region"""
]

In [None]:
metric = LLMJudgeDirect(
    evaluator_name="GRANITE3_3_8B",
    inference_engine=CrossProviderInferenceEngine(
        model="replicate/ibm-granite/granite-3.3-8b-instruct",
        provider="replicate",
        credentials={'api_token': get_env_var('REPLICATE_API_TOKEN')},
        provider_specific_args={'max_requests_per_second': 1}
    ),
    criteria=DirectCriteriaCatalogEnum.ANSWER_RELEVANCE.value,
    context_fields=["question"],
    criteria_field="criteria",
)

In [None]:
dataset = create_dataset(
    task="tasks.qa.open",
    test_set=data,
    metrics=[metric],
    split="test"
)

In [None]:
results = evaluate(predictions=predictions, data=dataset)

In [None]:
print("Global Scores:")
print(results.global_scores.summary)

print("Instance Scores:")
print(results.instance_scores)

In [None]:
def evaluate_response(query: str, tool_used: str, response: str) -> dict:
    """Evaluate an agent's response using the Granite model"""

    prompt = f"""<think>
Please evaluate this AI agent interaction:

User Query: {query}
Tool Used: {tool_used}
Agent Response: {response}

Rate on a scale of 1-5 (5 being best) and provide a brief explanation.
Return your evaluation in this JSON format:
{{"score": <1-5>, "explanation": "<your brief explanation>"}}
</think>"""

    try:
        # Get evaluation from Granite
        result = model.predict(prompt, max_new_tokens=200)
        return json.loads(result.strip())
    except:
        return {"score": 0, "explanation": "Evaluation failed"}

In [None]:
results= evaluate_response(predictions)

In [1]:
def try_agent(query: str, tool):
    with mlflow.start_run(run_name=example_name):
        mlflow.log_param("user_input", user_input)

        # Simulate agent response
        response = try_agent(user_input)

        mlflow.log_param("agent_response", response)
        print(f"\n{example_name}")
        print("Input:", user_input)
        print("Response:", response)

        # Optionally log output to a text file
        with open(f"{example_name.replace(' ', '_')}_output.txt", "w") as f:
            f.write(f"Input: {user_input}\nResponse: {response}")
        mlflow.log_artifact(f"{example_name.replace(' ', '_')}_output.txt")

In [None]:
# Agent Evaluation tests starts here!
try_agent(
    query="I'm looking for an ergonimic wireless mouse",  # Your question
    tool=agent_tool
)

## Your Turn!

Try creating your own queries below. Here are some ideas:
- Try different calculations
- Check weather for different cities
- Analyze different text samples

Just copy and modify the example below: