In [9]:
import pandas as pd
import time
from google.colab import userdata
# Replace 'your_file.json' with the actual path to your JSON file
try:
    df_from_json = pd.read_json('/content/ticket_dataset.json')
except FileNotFoundError:
    print("Error: 'your_file.json' not found. Please upload the file or provide the correct path.")

In [4]:
from google import genai
from google.genai import types
from pydantic import BaseModel
from typing import List

# 1. Configure the client (using env var or explicitly)
api_key = userdata.get('GOOGLE_API_KEY')
client = genai.Client(api_key=api_key)

In [6]:
# 2. Define your schema using Pydantic
class ClassificationOutput(BaseModel):
    class_: str
    reason: str
    confidence: int

In [7]:
#Add Few Shot Prompt as function
# Few-shot Examples Prompting
def classify_text_few_shot(text_to_classify, client):
    """
    Classifies text using few-shot prompting with the Gemini model.

    Args:
        text_to_classify: The text to classify.
        client: The Gemini client object.

    Returns:
        A JSON object containing the classification, reason, and confidence.
    """
    prompt_few_shot = f"""
    Classify the following text into one of these categories: Billing, Account Access, Technical Issue, Feature Request, Spam.
    Provide the output as a JSON object with three keys: "class", "reason", and "confidence".

    Text: "The desktop app crashes whenever I click ‘Sync’."
    Output: {{"class": "Technical Issue", "reason": "The text describes a software application crashing, indicating a technical problem.", "confidence": 95}}

    Text: "Could you add a dark mode toggle so the interface isn’t so bright at night?"
    Output: {{"class": "Feature Request", "reason": "The text asks for a new functionality or option to be added to the software.", "confidence": 90}}

    Text: "Limited-time crypto airdrop! Claim your reward now by clicking this link."
    Output: {{"class": "Spam", "reason": "The text promotes a suspicious offer and urges clicking a link, characteristic of spam.", "confidence": 98}}

    Text: "{text_to_classify}"<>
    """

    # 4. Call Gemini via the new SDK with schema enforcement
    response_few_shot = client.models.generate_content(
        model="gemini-2.5-flash",  # or whichever model you have access to
        contents=[prompt_few_shot],
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_schema=ClassificationOutput,
            temperature=0.5,
            max_output_tokens=5000,
        ),
    )
    return response_few_shot.text

# Example usage of the function:
text_to_classify_example = "Payment failed twice today during renewal, but my card is valid. Why isn't it going through?"
classification_output = classify_text_few_shot(text_to_classify_example, client)
print("\nFew-shot Examples Prompting Function Output:")
print(classification_output)


Few-shot Examples Prompting Function Output:
{"class_": "Billing", "reason": "The text describes a problem with a payment failing during a renewal process, which is directly related to billing and financial transactions.", "confidence": 97}


In [8]:
import json
from sklearn.metrics import accuracy_score

def evaluate_classification_accuracy(file_path, client):
    """
    Loads a dataset from a JSON file, classifies each text entry using few-shot prompting,
    and calculates the accuracy compared to the original labels.

    Args:
        file_path: The path to the JSON dataset file.
        client: The Gemini client object.

    Returns:
        The accuracy score (float).
    """
    try:
        with open(file_path, 'r') as f:
            dataset = json.load(f)
    except FileNotFoundError:
        print(f"Error: Dataset file not found at {file_path}")
        return None

    predictions = []
    true_labels = []

    for i,entry in enumerate(dataset):
        text = entry['text']
        true_label = entry['class']

        # Classify the text using the few-shot function
        classification_output_str = classify_text_few_shot(text, client)

        try:
            # Parse the JSON output from the classification function
            classification_output = json.loads(classification_output_str)
            predicted_label = classification_output.get('class_') # Use 'class_' to match Pydantic schema
            print(f" Predicted : {predicted_label} - Actual : {true_label}")
            if predicted_label:
                predictions.append(predicted_label)
                true_labels.append(true_label)
            else:
                print(f"Warning: Could not extract predicted class for text: {text}")

        except json.JSONDecodeError:
            print(f"Error decoding JSON output for text: {text}")

        time.sleep(30)


    if not predictions:
        print("No valid predictions were made.")
        return None


    # Calculate accuracy
    accuracy = accuracy_score(true_labels, predictions)
    return accuracy

# Example usage:
dataset_file = '/content/ticket_dataset.json'  # Replace with the actual path to your dataset
accuracy = evaluate_classification_accuracy(dataset_file, client)

if accuracy is not None:
    print(f"\nClassification Accuracy: {accuracy:.4f}")

 Predicted : Billing - Actual : Billing
 Predicted : Account Access - Actual : Account Access
 Predicted : Technical Issue - Actual : Technical Issue
 Predicted : Feature Request - Actual : Feature Request
 Predicted : Spam - Actual : Spam
 Predicted : Technical Issue - Actual : Technical Issue
 Predicted : Billing - Actual : Billing
 Predicted : Feature Request - Actual : Feature Request
 Predicted : Account Access - Actual : Account Access
 Predicted : Billing - Actual : Billing
 Predicted : Account Access - Actual : Account Access
 Predicted : Technical Issue - Actual : Technical Issue
 Predicted : Feature Request - Actual : Feature Request
 Predicted : Account Access - Actual : Account Access
 Predicted : Billing - Actual : Billing
 Predicted : Spam - Actual : Spam
 Predicted : Feature Request - Actual : Feature Request
 Predicted : Technical Issue - Actual : Technical Issue
 Predicted : Account Access - Actual : Account Access
 Predicted : Billing - Actual : Billing

Classificatio