In [30]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
import pandas as pd

# Load the dataset from Google Drive into a pandas DataFrame
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/PhD_Thesis_Experiments/GitHub_ToChair/sample_complaints_2years_006_balanced.csv')

In [32]:
import pandas as pd
import numpy as np
import json
import os
import time
from openai import AzureOpenAI
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from datetime import datetime

In [4]:
# ==================== Configuration ====================
AZURE_OPENAI_ENDPOINT = "https://datascienceagenticaiwork.cognitiveservices.azure.com/"
AZURE_OPENAI_KEY = ""
AZURE_OPENAI_API_VERSION = "2024-12-01-preview"  # Use appropriate API version
BASE_MODEL = "gpt-4o-mini"  # Base model for fine-tuning
deployment = "gpt-4o-mini-tobefinetune"


In [33]:
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Function to split hierarchical labels into product and sub-product
def split_hierarchical_label(label):
    if '::' in label:
        return label.split('::')
    else:
        return [label, 'None'] # Handle cases with no sub-product

# Function to calculate hierarchical metrics
def hierarchical_metrics(y_true, y_pred):
    product_true = [split_hierarchical_label(label)[0] for label in y_true]
    sub_product_true = [split_hierarchical_label(label)[1] for label in y_true]
    product_pred = [split_hierarchical_label(label)[0] for label in y_pred]
    sub_product_pred = [split_hierarchical_label(label)[1] for label in y_pred]

    # Calculate metrics at the product level
    product_precision = precision_score(product_true, product_pred, average='weighted', zero_division=0)
    product_recall = recall_score(product_true, product_pred, average='weighted', zero_division=0)
    product_f1 = f1_score(product_true, product_pred, average='weighted', zero_division=0)

    # Calculate metrics at the sub-product level (only for non-None sub-products)
    # We need to filter for cases where both true and predicted sub-products are not 'None'
    valid_sub_product_true = [sub for i, sub in enumerate(sub_product_true) if sub != 'None' and sub_product_pred[i] != 'None']
    valid_sub_product_pred = [sub for i, sub in enumerate(sub_product_pred) if sub != 'None' and sub_product_true[i] != 'None']


    sub_product_precision = precision_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0
    sub_product_recall = recall_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0
    sub_product_f1 = f1_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0


    # A simple way to combine scores (can be weighted based on importance)
    # Here, we'll just average them
    hierarchical_precision = (product_precision + sub_product_precision) / 2
    hierarchical_recall = (product_recall + sub_product_recall) / 2
    hierarchical_f1 = (product_f1 + sub_product_f1) / 2

    return {
        'product_precision': product_precision,
        'product_recall': product_recall,
        'product_f1': product_f1,
        'sub_product_precision': sub_product_precision,
        'sub_product_recall': sub_product_recall,
        'sub_product_f1': sub_product_f1,
        'hierarchical_precision': hierarchical_precision,
        'hierarchical_recall': hierarchical_recall,
        'hierarchical_f1': hierarchical_f1
    }


In [8]:
import pandas as pd
import json
from sklearn.model_selection import train_test_split
import os

# ------------------------------
# 1Ô∏è‚É£ Load or use your dataframe
# ------------------------------
# If already in memory: df = your DataFrame
# Example if loading from CSV:
# df = pd.read_csv("complaints.csv")

# Ensure required columns exist
assert {"consumer_complaint_narrative", "hierarchical_label"}.issubset(df.columns), \
    "DataFrame must contain 'consumer_complaint_narrative' and 'hierarchical_label' columns."

# Drop rows with missing values
df = df.dropna(subset=["consumer_complaint_narrative", "hierarchical_label"])

# ------------------------------
# 2Ô∏è‚É£ Split into train & validation
# ------------------------------
train_df, val_df = train_test_split(df, test_size=0.1, random_state=42, stratify=df["hierarchical_label"])

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")

# ------------------------------
# 3Ô∏è‚É£ Helper function to save JSONL
# ------------------------------
def save_jsonl(dataframe, output_path):
    """Save dataframe to Azure OpenAI fine-tuning JSONL format."""
    with open(output_path, "w", encoding="utf-8") as f:
        for _, row in dataframe.iterrows():
            complaint_text = str(row["consumer_complaint_narrative"]).replace("\n", " ").replace("\r", " ").strip()
            label = str(row["hierarchical_label"]).strip()

            record = {
                "messages": [
                    {
                        "role": "system",
                        "content": "You are a helpful assistant that classifies consumer financial complaints into categories."
                    },
                    {
                        "role": "user",
                        "content": complaint_text
                    },
                    {
                        "role": "assistant",
                        "content": label
                    }
                ]
            }

            f.write(json.dumps(record, ensure_ascii=False) + "\n")

    print(f"‚úÖ File saved: {output_path} ({len(dataframe)} records)")

# ------------------------------
# 4Ô∏è‚É£ Create output directory & save
# ------------------------------
os.makedirs("fine_tune_data", exist_ok=True)

save_jsonl(train_df, "fine_tune_data/train_data.jsonl")
save_jsonl(val_df, "fine_tune_data/val_data.jsonl")

print("üéØ Training and validation JSONL files are ready for Azure OpenAI fine-tuning.")


Training samples: 14400
Validation samples: 1600
‚úÖ File saved: fine_tune_data/train_data.jsonl (14400 records)
‚úÖ File saved: fine_tune_data/val_data.jsonl (1600 records)
üéØ Training and validation JSONL files are ready for Azure OpenAI fine-tuning.


In [7]:
from openai import AzureOpenAI

client = AzureOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_KEY,
    api_version=AZURE_OPENAI_API_VERSION
)

In [9]:
# Upload files directly from Python
with open("fine_tune_data/train_data.jsonl", "rb") as f:
    train_file = client.files.create(file=f, purpose="fine-tune")

with open("fine_tune_data/val_data.jsonl", "rb") as f:
    val_file = client.files.create(file=f, purpose="fine-tune")

print("Train file ID:", train_file.id)
print("Validation file ID:", val_file.id)

Train file ID: file-ac2162910b0442979cf67e1323c8cd0a
Validation file ID: file-77ec932e57444752af3b0eae4524f0e2


### Full Supervised Fine‚ÄëTuning (SFT) on Azure OpenAI GPT Models

In [18]:
# ‚úÖ Continue using the same client and configuration from before
fine_tune = client.fine_tuning.jobs.create(
    model = "gpt-4o-2024-08-06",   # BASE_MODEL,  # "gpt-4.1" or "gpt-4.1-mini"
    training_file="file-ac2162910b0442979cf67e1323c8cd0a",
    validation_file="file-77ec932e57444752af3b0eae4524f0e2",
    hyperparameters={
        "n_epochs": 3  # you can tune this; 2‚Äì4 is typical
    },
    suffix="cfpb-complaint-classifier"
)

print("‚úÖ Fine-tuning job started!")
print("Job ID:", fine_tune.id)


‚úÖ Fine-tuning job started!
Job ID: ftjob-36293937da9a4d58bc21d14dcf13f813


In [46]:
status = client.fine_tuning.jobs.retrieve("ftjob-36293937da9a4d58bc21d14dcf13f813")
print("Status:", status.status)

Status: succeeded


### Get the fine‚Äëtuned model ID

In [9]:
from openai import AzureOpenAI

job_id = "ftjob-36293937da9a4d58bc21d14dcf13f813"
job = client.fine_tuning.jobs.retrieve(job_id)
print("Status:", job.status)
print("Fine-tuned model ID:", job.fine_tuned_model)

FT_MODEL_ID = job.fine_tuned_model  # e.g., 'ft:gpt-4o-2024-08-06:complaints-v1-2025-11-05'

Status: succeeded
Fine-tuned model ID: gpt-4o-2024-08-06.ft-36293937da9a4d58bc21d14dcf13f813-cfpb-complaint-classifier


### Using Azure Portal deployed the Fine-tuned model ID: gpt-4o-2024-08-06.ft-36293937da9a4d58bc21d14dcf13f813-cfpb-complaint-classifier with name:  gpt-4o-2024-08-06-custom-multiclass-classifier

In [34]:
import pandas as pd
import numpy as np
import os
from openai import AzureOpenAI
import random
import asyncio
import aiohttp
from tqdm import tqdm
from openai import AsyncAzureOpenAI

In [24]:
#DEPLOYMENT_NAME = "gpt-4o-2024-08-06-custom-multiclass-classifier"

In [38]:
# ==========================================================
# 2Ô∏è‚É£ Dataset Setup
# ==========================================================
# Example: df has ['consumer_complaint_narrative', 'hierarchical_label']
# df = pd.read_csv('cfpb_complaints.csv')
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['hierarchical_label'])
unique_labels = df['hierarchical_label'].unique().tolist()

In [39]:
AZURE_API_KEY = ""
AZURE_ENDPOINT = "https://datascienceagenticaiwork.cognitiveservices.azure.com/"
API_VERSION = "2024-12-01-preview"
MODEL_NAME = "gpt-4o"
DEPLOYMENT_NAME = "gpt-4o-2024-08-06-custom-multiclass-classifier"

# -----------------------------
# INITIALIZE CLIENT
# -----------------------------
# Use AsyncAzureOpenAI for asynchronous operations
aclient = AsyncAzureOpenAI(
    api_key=AZURE_API_KEY,
    api_version=API_VERSION,
    azure_endpoint=AZURE_ENDPOINT
)

# --- Set your Azure OpenAI credentials and endpoint ---
os.environ["AZURE_OPENAI_API_KEY"] = AZURE_API_KEY  # <-- Put your key (or store securely)
os.environ["AZURE_OPENAI_ENDPOINT"] = AZURE_ENDPOINT # <-- Your endpoint

# Create the Azure OpenAI client (use the API version supported by your resource)
client = AzureOpenAI(
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version = API_VERSION,
    azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"],
)

# Use your **deployment name** (not the base model name)
# deployment_name = MODEL_NAME  # e.g., "gpt4o-mini-prod"

In [40]:
# ==========================================================
# 4Ô∏è‚É£ Async GPT-4.1 Zero-Shot Classifier
# ==========================================================
import asyncio
import random

async def classify_with_gpt40(session, text, label_list, max_retries=3):
    prompt = f"""
    You are a financial complaint classifier.
    Given the following consumer complaint, classify it into one of these hierarchical categories:
    {', '.join(label_list)}

    Complaint:
    "{text}"

    Respond with exactly one label from the list above.
    """

    url = f"{AZURE_ENDPOINT}openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version={API_VERSION}"
    headers = {"Content-Type": "application/json", "api-key": AZURE_API_KEY}
    payload = {
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.0,
        "max_tokens": 50
    }

    for attempt in range(max_retries):
        try:
            async with session.post(url, headers=headers, json=payload, timeout=60) as resp:
                if resp.status == 200:
                    data = await resp.json()
                    return data["choices"][0]["message"]["content"].strip()

                # Handle 429 (Rate Limit)
                elif resp.status == 429:
                    await asyncio.sleep(1 + random.random())  # small random delay
                    continue  # retry silently

                # Handle transient server errors quietly
                elif 500 <= resp.status < 600:
                    await asyncio.sleep(1 + random.random())
                    continue  # retry silently

                # For other non-critical errors: skip quietly
                else:
                    return ""

        except Exception:
            # Suppress exceptions silently, retry
            await asyncio.sleep(0.5)
            continue

    # Return empty label if all retries fail
    return ""


In [41]:
# ==========================================================
# 5Ô∏è‚É£ Parallel Inference
# ==========================================================
async def run_parallel_inference(test_data, label_list, max_concurrent=5):
    y_true, y_pred = [], []
    semaphore = asyncio.Semaphore(max_concurrent)

    async with aiohttp.ClientSession() as session:
        tasks = []

        async def process_row(row):
            async with semaphore:
                predicted = await classify_with_gpt40(session, row['consumer_complaint_narrative'], label_list)
                return row['hierarchical_label'], predicted

        for _, row in test_data.iterrows():
            tasks.append(process_row(row))

        for f in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Classifying", leave=False):
            true_label, pred_label = await f
            y_true.append(true_label)
            y_pred.append(pred_label)

    return y_true, y_pred


In [42]:
# ==========================================================
# 6Ô∏è‚É£ Main Execution
# ==========================================================
# y_true, y_pred = asyncio.run(run_parallel_inference(test_df, unique_labels, max_concurrent=5))
import nest_asyncio
import asyncio

nest_asyncio.apply()  # allows reusing event loop inside notebook

# Run async inference
y_true, y_pred = await run_parallel_inference(test_df, unique_labels, max_concurrent=5)





In [43]:
# ==========================================================
# 7Ô∏è‚É£ Evaluate Results
# ==========================================================
metrics = hierarchical_metrics(y_true, y_pred)
for key, value in metrics.items():
    print(f"{key.replace('_', ' ').title()}: {value:.4f}")

Product Precision: 0.7702
Product Recall: 0.6328
Product F1: 0.6798
Sub Product Precision: 0.6596
Sub Product Recall: 0.5737
Sub Product F1: 0.5621
Hierarchical Precision: 0.7149
Hierarchical Recall: 0.6033
Hierarchical F1: 0.6210
