In [34]:
import pandas as pd
import torch
import numpy as np
from qdrant_client import QdrantClient
from transformers import pipeline
import huggingface
from typing import Optional, Union, List, Tuple
import torch.nn.functional as F
import logging
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import VectorParams, Distance, PointStruct, Filter, FieldCondition, MatchValue
from huggingface_hub import login
from HF_t import hf_token_read
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Vector Database

In [19]:
# Initilizae the Qdrant
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import VectorParams, Distance, PointStruct, Filter, FieldCondition, MatchValue

#initialize with a local path to persist data on disk without a server:
#client = QdrantClient('./PetHealth_Chatbot/Vector_Database')
# Connect to a local Qdrant instance
client = QdrantClient(url="http://localhost:6333") # Or specify your actual local host/port

collection_name = "vet_notes"

collection_info = client.get_collection(collection_name=collection_name)

INFO:httpx:HTTP Request: GET http://localhost:6333 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/vet_notes "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/vet_notes "HTTP/1.1 200 OK"


# Loading Models

First, we'll load two different models:
1. VetBERT model for generating embeddings (vector representations)
2. Classification model for predicting conditions

In [27]:
# Load VetBERT model for embeddings
print("Loading VetBERT model for embeddings...")
vetbert_model = AutoModel.from_pretrained("havocy28/VetBERT")
vetbert_tokenizer = AutoTokenizer.from_pretrained("havocy28/VetBERT")

# Check if a GPU is available and move the model to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vetbert_model.to(device)
print("VetBERT model loaded successfully")

Loading VetBERT model for embeddings...
VetBERT model loaded successfully


In [35]:
def get_vetbert_embeddings(
    text: str,
    model: AutoModel,
    tokenizer: AutoTokenizer,
    device: torch.device,
    return_numpy: bool = True
) -> Union[torch.Tensor, np.ndarray]:
    """
    Generate embeddings using VetBERT model.
    
    Args:
        text (str): Input text to generate embeddings for
        model: VetBERT model
        tokenizer: VetBERT tokenizer
        device: Computation device (CPU/GPU)
        return_numpy: If True, returns numpy array (for Qdrant), if False returns PyTorch tensor
        
    Returns:
        Union[torch.Tensor, np.ndarray]: Text embeddings from the [CLS] token
    """
    try:
        # Ensure model is in eval mode
        model.eval()
        
        # Tokenize the input
        inputs = tokenizer(
            text,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512
        )
        
        # Move inputs to device
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        # Generate embeddings
        with torch.no_grad():
            outputs = model(**inputs)
            # Get the [CLS] token embedding (first token)
            embeddings = outputs.last_hidden_state[:, 0, :]
            
        # Convert to numpy if requested
        if return_numpy:
            embeddings = embeddings.cpu().numpy()
            
        return embeddings
        
    except Exception as e:
        logger.error(f"Error generating embeddings: {str(e)}")
        raise

In [21]:
#loading the model from hugging face
#Use the token for reading to load the model in huging face
hf_token_write = "hf_token_read"

# In your notebook
# from huggingface_hub import login
# from HF_t import hf_token_read  # Import your token securely

login(token=hf_token_read)

# Replace "your-username/my-awesome-fine-tuned-model" with your actual repository ID
repo_id = "fdastak/model_calssification" 

# Load the fine-tuned model and its tokenizer
model_Class= AutoModelForSequenceClassification.from_pretrained(repo_id)
tokenizer_class = AutoTokenizer.from_pretrained(repo_id)

In [40]:
# Example usage
input_text = "Something grows on one of my dog ears"  # Your input text

# Create a mapping dictionary for label to condition
label_to_condition = {
    0: "digestive issues",
    1: "ear infections",
    2: "mobility problems",
    3: "parasites",
    4: "skin irritations"
}

def get_condition_name(label: int) -> str:
    """
    Convert numeric label to condition name.
    
    Args:
        label (int): Numeric label from the classifier
        
    Returns:
        str: Corresponding condition name
    """
    return label_to_condition.get(label, "unknown condition")

try:
    # Step 1: Generate embeddings using VetBERT for vector search (in numpy format for Qdrant)
    query_vector = get_vetbert_embeddings(
        text=input_text,
        model=vetbert_model,
        tokenizer=vetbert_tokenizer,
        device=device,
        return_numpy=True  # Get numpy array directly for Qdrant
    )
    print(f"Generated embeddings shape: {query_vector.shape}")
    
    # Step 2: Get condition prediction using classification model
    predicted_label, confidence = predict_condition(
        text=input_text,
        classifier_model=model_Class,  # Use classification model for predictions
        classifier_tokenizer=tokenizer_class,  # Use classification tokenizer
        device=device
    )
    
    # Convert numeric label to condition name
    condition_name = get_condition_name(predicted_label)
    
    print(f"\nClassification results:")
    print(f"Predicted Label: {predicted_label} ({condition_name})")
    print(f"Confidence Score: {confidence:.4f}")
    
    # Step 3: Search similar cases in Qdrant
    filter_by_category = Filter(
        must=[FieldCondition(key="condition", match=MatchValue(value=condition_name))]
    )
    
    # Using search instead of query_points since it's working
    results = client.search(
        collection_name=collection_name,
        query_vector=query_vector[0],
        limit=3,
        query_filter=filter_by_category,
        with_payload=True  # returns metadata like text and category
    )
    
    print("\nSimilar cases from the database:")
    for result in results:
        print(f"Score: {result.score:.4f}, Text: {result.payload['text']}, Category: {result.payload['condition']}")
        
except Exception as e:
    print(f"Error during processing: {str(e)}")

INFO:__main__:Successfully generated prediction with confidence 0.5530
  results = client.search(
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/vet_notes/points/search "HTTP/1.1 200 OK"
  results = client.search(
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/vet_notes/points/search "HTTP/1.1 200 OK"


Generated embeddings shape: (1, 768)

Classification results:
Predicted Label: 1 (ear infections)
Confidence Score: 0.5530

Similar cases from the database:
Score: 0.8421, Text: evaluate conformation of ear canal (stenotic breeds)., Category: ear infections
Score: 0.8395, Text: use tris-edta containing ear flush for pseudomonas infections., Category: ear infections
Score: 0.8367, Text: use ear wick or packing with medication for severe stenosis., Category: ear infections


# Agent System

We'll create two agents that work together:
1. **Classification Agent**: Identifies the pet's condition from user input
2. **Retrieval Agent**: Finds relevant veterinary notes based on the identified condition

In [41]:
class ClassificationAgent:
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
        self.label_to_condition = {
            0: "digestive issues",
            1: "ear infections",
            2: "mobility problems",
            3: "parasites",
            4: "skin irritations"
        }
    
    def identify_condition(self, user_input: str) -> tuple:
        """
        Identify pet's condition from user input.
        
        Args:
            user_input (str): Description of pet's symptoms
            
        Returns:
            tuple: (condition_name, confidence_score)
        """
        try:
            # Get prediction
            predicted_label, confidence = predict_condition(
                text=user_input,
                classifier_model=self.model,
                classifier_tokenizer=self.tokenizer,
                device=self.device
            )
            
            # Convert to condition name
            condition_name = self.label_to_condition.get(predicted_label, "unknown condition")
            
            logger.info(f"Identified condition: {condition_name} with confidence: {confidence:.4f}")
            return condition_name, confidence
            
        except Exception as e:
            logger.error(f"Error in classification: {str(e)}")
            raise

class RetrievalAgent:
    def __init__(self, model, tokenizer, device, qdrant_client, collection_name):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device
        self.client = qdrant_client
        self.collection_name = collection_name
    
    def find_similar_cases(self, user_input: str, condition: str, limit: int = 3) -> list:
        """
        Find similar veterinary cases based on input and condition.
        
        Args:
            user_input (str): User's description of symptoms
            condition (str): Identified condition to filter by
            limit (int): Number of similar cases to retrieve
            
        Returns:
            list: Similar cases with their scores
        """
        try:
            # Generate embeddings for the query
            query_vector = get_vetbert_embeddings(
                text=user_input,
                model=self.model,
                tokenizer=self.tokenizer,
                device=self.device,
                return_numpy=True
            )
            
            # Set up condition filter
            condition_filter = Filter(
                must=[FieldCondition(key="condition", match=MatchValue(value=condition))]
            )
            
            # Search for similar cases
            results = self.client.search(
                collection_name=self.collection_name,
                query_vector=query_vector[0],
                limit=limit,
                query_filter=condition_filter,
                with_payload=True
            )
            
            logger.info(f"Found {len(results)} similar cases for condition: {condition}")
            return results
            
        except Exception as e:
            logger.error(f"Error in retrieval: {str(e)}")
            raise

# Initialize agents
classification_agent = ClassificationAgent(
    model=model_Class,
    tokenizer=tokenizer_class,
    device=device
)

retrieval_agent = RetrievalAgent(
    model=vetbert_model,
    tokenizer=vetbert_tokenizer,
    device=device,
    qdrant_client=client,
    collection_name=collection_name
)

In [42]:
# Example usage with the agent system
def process_user_query(user_input: str):
    """Process user query through both agents"""
    print(f"Processing query: {user_input}\n")
    
    try:
        # Step 1: Use Classification Agent to identify the condition
        condition, confidence = classification_agent.identify_condition(user_input)
        print(f"Classification Results:")
        print(f"Identified Condition: {condition}")
        print(f"Confidence Score: {confidence:.4f}\n")
        
        # Step 2: Use Retrieval Agent to find similar cases
        similar_cases = retrieval_agent.find_similar_cases(user_input, condition)
        
        print(f"Similar Cases for {condition}:")
        for case in similar_cases:
            print(f"Score: {case.score:.4f}")
            print(f"Case: {case.payload['text']}")
            print(f"Condition: {case.payload['condition']}\n")
            
    except Exception as e:
        print(f"Error processing query: {str(e)}")

# Test the system
user_input = "Something grows on one of my dog ears"
process_user_query(user_input)

# Try another example
user_input2 = "My cat has been scratching a lot and has some red spots"
process_user_query(user_input2)

INFO:__main__:Successfully generated prediction with confidence 0.5530
INFO:__main__:Identified condition: ear infections with confidence: 0.5530
INFO:__main__:Identified condition: ear infections with confidence: 0.5530


Processing query: Something grows on one of my dog ears

Classification Results:
Identified Condition: ear infections
Confidence Score: 0.5530



  results = self.client.search(
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/vet_notes/points/search "HTTP/1.1 200 OK"
INFO:__main__:Found 3 similar cases for condition: ear infections
INFO:__main__:Found 3 similar cases for condition: ear infections
INFO:__main__:Successfully generated prediction with confidence 0.8247
INFO:__main__:Identified condition: parasites with confidence: 0.8247
INFO:__main__:Successfully generated prediction with confidence 0.8247
INFO:__main__:Identified condition: parasites with confidence: 0.8247


Similar Cases for ear infections:
Score: 0.8421
Case: evaluate conformation of ear canal (stenotic breeds).
Condition: ear infections

Score: 0.8395
Case: use tris-edta containing ear flush for pseudomonas infections.
Condition: ear infections

Score: 0.8367
Case: use ear wick or packing with medication for severe stenosis.
Condition: ear infections

Processing query: My cat has been scratching a lot and has some red spots

Classification Results:
Identified Condition: parasites
Confidence Score: 0.8247



INFO:httpx:HTTP Request: POST http://localhost:6333/collections/vet_notes/points/search "HTTP/1.1 200 OK"
INFO:__main__:Found 3 similar cases for condition: parasites
INFO:__main__:Found 3 similar cases for condition: parasites


Similar Cases for parasites:
Score: 0.8267
Case: ear mites in canal; clean and apply selamectin.
Condition: parasites

Score: 0.8093
Case: sarcoptic mange; prescribe oral ivermectin.
Condition: parasites

Score: 0.8061
Case: flea dirt present on coat combing; ctenocephalides felis infestation confirmed.
Condition: parasites



In [39]:
# Query example: find top 3 similar notes to a query vector, filtering by category "dog"
# query_vector = [...]  # embedding vector for owner input
filter_by_category = Filter(
    must=[FieldCondition(key="condition", match=MatchValue(value="ear infections"))]
)

results = client.search(
    collection_name=collection_name,
    query_vector=query_vector[0],
    limit=3,
    query_filter=filter_by_category,
    with_payload=True,  # returns metadata like text and category
)

for result in results:
    print(f"Score: {result.score:.4f}, Text: {result.payload['text']}, Category: {result.payload['condition']}")


  results = client.search(
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/vet_notes/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:6333/collections/vet_notes/points/search "HTTP/1.1 200 OK"


Score: 0.8421, Text: evaluate conformation of ear canal (stenotic breeds)., Category: ear infections
Score: 0.8395, Text: use tris-edta containing ear flush for pseudomonas infections., Category: ear infections
Score: 0.8367, Text: use ear wick or packing with medication for severe stenosis., Category: ear infections
