In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

# Load the dataset from Google Drive into a pandas DataFrame
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/PhD_Thesis_Experiments/GitHub_ToChair/sample_complaints_2years_006_balanced.csv')

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Function to split hierarchical labels into product and sub-product
def split_hierarchical_label(label):
    if '::' in label:
        return label.split('::')
    else:
        return [label, 'None'] # Handle cases with no sub-product

# Function to calculate hierarchical metrics
def hierarchical_metrics(y_true, y_pred):
    product_true = [split_hierarchical_label(label)[0] for label in y_true]
    sub_product_true = [split_hierarchical_label(label)[1] for label in y_true]
    product_pred = [split_hierarchical_label(label)[0] for label in y_pred]
    sub_product_pred = [split_hierarchical_label(label)[1] for label in y_pred]

    # Calculate metrics at the product level
    product_precision = precision_score(product_true, product_pred, average='weighted', zero_division=0)
    product_recall = recall_score(product_true, product_pred, average='weighted', zero_division=0)
    product_f1 = f1_score(product_true, product_pred, average='weighted', zero_division=0)

    # Calculate metrics at the sub-product level (only for non-None sub-products)
    # We need to filter for cases where both true and predicted sub-products are not 'None'
    valid_sub_product_true = [sub for i, sub in enumerate(sub_product_true) if sub != 'None' and sub_product_pred[i] != 'None']
    valid_sub_product_pred = [sub for i, sub in enumerate(sub_product_pred) if sub != 'None' and sub_product_true[i] != 'None']


    sub_product_precision = precision_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0
    sub_product_recall = recall_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0
    sub_product_f1 = f1_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0


    # A simple way to combine scores (can be weighted based on importance)
    # Here, we'll just average them
    hierarchical_precision = (product_precision + sub_product_precision) / 2
    hierarchical_recall = (product_recall + sub_product_recall) / 2
    hierarchical_f1 = (product_f1 + sub_product_f1) / 2

    return {
        'product_precision': product_precision,
        'product_recall': product_recall,
        'product_f1': product_f1,
        'sub_product_precision': sub_product_precision,
        'sub_product_recall': sub_product_recall,
        'sub_product_f1': sub_product_f1,
        'hierarchical_precision': hierarchical_precision,
        'hierarchical_recall': hierarchical_recall,
        'hierarchical_f1': hierarchical_f1
    }


In [None]:
import pandas as pd
import numpy as np
import os
from openai import AzureOpenAI
import random
import asyncio
import aiohttp
from tqdm import tqdm
from openai import AsyncAzureOpenAI

In [None]:
# ==========================================================
# 2️⃣ Dataset Setup
# ==========================================================
# Example: df has ['consumer_complaint_narrative', 'hierarchical_label']
# df = pd.read_csv('cfpb_complaints.csv')
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['hierarchical_label'])
unique_labels = df['hierarchical_label'].unique().tolist()

In [None]:
AZURE_API_KEY = ""
AZURE_ENDPOINT = "https://datascienceagenticaiwork.cognitiveservices.azure.com/"
API_VERSION = "2024-12-01-preview"
MODEL_NAME = "gpt-4.1"
DEPLOYMENT_NAME = "gpt-4.1"

# -----------------------------
# INITIALIZE CLIENT
# -----------------------------
# Use AsyncAzureOpenAI for asynchronous operations
aclient = AsyncAzureOpenAI(
    api_key=AZURE_API_KEY,
    api_version=API_VERSION,
    azure_endpoint=AZURE_ENDPOINT
)

# --- Set your Azure OpenAI credentials and endpoint ---
os.environ["AZURE_OPENAI_API_KEY"] = AZURE_API_KEY  # <-- Put your key (or store securely)
os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_ENDPOINT # <-- Your endpoint

# Create the Azure OpenAI client (use the API version supported by your resource)
client = AzureOpenAI(
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version = API_VERSION,
    azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"],
)

# Use your **deployment name** (not the base model name)
# deployment_name = MODEL_NAME  # e.g., "gpt4o-mini-prod"

In [None]:
# ==========================================================
# 4️⃣ Async GPT-4.1 Zero-Shot Classifier
# ==========================================================
import asyncio
import random

async def classify_with_gpt35(session, text, label_list, max_retries=3):
    prompt = f"""
    You are a financial complaint classifier.
    Given the following consumer complaint, classify it into one of these hierarchical categories:
    {', '.join(label_list)}

    Complaint:
    "{text}"

    Respond with exactly one label from the list above.
    """

    url = f"{AZURE_ENDPOINT}openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version={API_VERSION}"
    headers = {"Content-Type": "application/json", "api-key": AZURE_API_KEY}
    payload = {
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.0,
        "max_tokens": 50
    }

    for attempt in range(max_retries):
        try:
            async with session.post(url, headers=headers, json=payload, timeout=60) as resp:
                if resp.status == 200:
                    data = await resp.json()
                    return data["choices"][0]["message"]["content"].strip()

                # Handle 429 (Rate Limit)
                elif resp.status == 429:
                    await asyncio.sleep(1 + random.random())  # small random delay
                    continue  # retry silently

                # Handle transient server errors quietly
                elif 500 <= resp.status < 600:
                    await asyncio.sleep(1 + random.random())
                    continue  # retry silently

                # For other non-critical errors: skip quietly
                else:
                    return ""

        except Exception:
            # Suppress exceptions silently, retry
            await asyncio.sleep(0.5)
            continue

    # Return empty label if all retries fail
    return ""


In [None]:
# ==========================================================
# 5️⃣ Parallel Inference
# ==========================================================
async def run_parallel_inference(test_data, label_list, max_concurrent=5):
    y_true, y_pred = [], []
    semaphore = asyncio.Semaphore(max_concurrent)

    async with aiohttp.ClientSession() as session:
        tasks = []

        async def process_row(row):
            async with semaphore:
                predicted = await classify_with_gpt35(session, row['consumer_complaint_narrative'], label_list)
                return row['hierarchical_label'], predicted

        for _, row in test_data.iterrows():
            tasks.append(process_row(row))

        for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Classifying", leave=False):
            true_label, pred_label = await f
            y_true.append(true_label)
            y_pred.append(pred_label)

    return y_true, y_pred


In [None]:
# ==========================================================
# 6️⃣ Main Execution
# ==========================================================
# y_true, y_pred = asyncio.run(run_parallel_inference(test_df, unique_labels, max_concurrent=5))
import nest_asyncio
import asyncio

nest_asyncio.apply()  # allows reusing event loop inside notebook

# Run async inference
y_true, y_pred = await run_parallel_inference(test_df, unique_labels, max_concurrent=5)





In [None]:
# ==========================================================
# 7️⃣ Evaluate Results
# ==========================================================
metrics = hierarchical_metrics(y_true, y_pred)
for key, value in metrics.items():
    print(f"{key.replace('_', ' ').title()}: {value:.4f}")

Product Precision: 0.7751
Product Recall: 0.5369
Product F1: 0.6016
Sub Product Precision: 0.6739
Sub Product Recall: 0.5061
Sub Product F1: 0.5022
Hierarchical Precision: 0.7245
Hierarchical Recall: 0.5215
Hierarchical F1: 0.5519
