In [11]:
import pandas as pd
import time
from openai import AzureOpenAI

In [2]:
# ===============================
# 1. Feature Library
# ===============================

feature_library_data = {
    "feature_name": [
        "wirein_ct",
        "perc_hrg_wire_amt",
        "degree_centrality"
    ],
    "feature_meaning": [
        "Number of wire inbound transactions",
        "Percentage of wire amount associated with high-risk geographic country",
        "Number of connections an entity has in a network (degree centrality)"
    ]
}

feature_library_df = pd.DataFrame(feature_library_data)
feature_library_df.to_csv("feature_library.csv", index=False)
print("feature_library.csv saved.")

# ===============================
# 2. Feature Score
# ===============================

feature_score_data = {
    "feature_name": [
        "wirein_ct",
        "perc_hrg_wire_amt",
        "degree_centrality"
    ],
    "score": [
        0.52,
        0.36,
        0.12
    ]
}

feature_score_df = pd.DataFrame(feature_score_data)
feature_score_df.to_csv("feature_score.csv", index=False)
print("feature_score.csv saved.")

# ===============================
# 3. Risk Score
# ===============================

risk_score_data = {
    "risk_score": [
        0.8
    ]
}

risk_score_df = pd.DataFrame(risk_score_data)
risk_score_df.to_csv("risk_score.csv", index=False)
print("risk_score.csv saved.")

feature_library.csv saved.
feature_score.csv saved.
risk_score.csv saved.


In [3]:
# ===============================
# 1. Load the data
# ===============================

feature_library = pd.read_csv("feature_library.csv")
feature_score = pd.read_csv("feature_score.csv")
risk_score_df = pd.read_csv("risk_score.csv")

In [6]:
feature_library, feature_score, risk_score_df

(        feature_name                                    feature_meaning
 0          wirein_ct                Number of wire inbound transactions
 1  perc_hrg_wire_amt  Percentage of wire amount associated with high...
 2  degree_centrality  Number of connections an entity has in a netwo...,
         feature_name  score
 0          wirein_ct   0.52
 1  perc_hrg_wire_amt   0.36
 2  degree_centrality   0.12,
    risk_score
 0         0.8)

In [9]:
# ========== 2. Prepare the Prompt ==========
# Sort feature scores by descending contribution
feature_score_df = feature_score_df.sort_values(by="score", ascending=False)

# Merge the scores with the feature meanings
merged_df = pd.merge(
    feature_score_df,
    feature_library_df,
    on="feature_name",
    how="left"
)

# Extract the risk score
risk_score = risk_score_df.iloc[0]["risk_score"]

# Format features for the prompt
features_text = ""
for _, row in merged_df.iterrows():
    feature_name = row["feature_name"]
    feature_meaning = row["feature_meaning"]
    score = row["score"]
    features_text += (
        f"- {feature_name} ({feature_meaning}): {score:.0%} contribution\n"
    )

# Build the prompt
prompt_text = f"""
You are a risk model explanation assistant. Given a risk score and a list of features with their descriptions and contributions, generate a clear, concise narrative explaining the risk score.

Risk Score: {risk_score:.0%}
Top Features and Contributions:
{features_text}

Please produce a narrative that:
- Starts with the risk score
- Explains each feature’s contribution in plain language
- Highlights why each feature might indicate a higher risk
"""

print("=== Prompt Text ===")
print(prompt_text)

=== Prompt Text ===

You are a risk model explanation assistant. Given a risk score and a list of features with their descriptions and contributions, generate a clear, concise narrative explaining the risk score.

Risk Score: 80%
Top Features and Contributions:
- wirein_ct (Number of wire inbound transactions): 52% contribution
- perc_hrg_wire_amt (Percentage of wire amount associated with high-risk geographic country): 36% contribution
- degree_centrality (Number of connections an entity has in a network (degree centrality)): 12% contribution


Please produce a narrative that:
- Starts with the risk score
- Explains each feature’s contribution in plain language
- Highlights why each feature might indicate a higher risk



In [None]:
# ========== 3. Call Azure OpenAI API ==========

# Fill in your Azure details here:
api_key = "YOUR_AZURE_OPENAI_API_KEY"
api_base = "YOUR_AZURE_OPENAI_ENDPOINT"       # e.g. "https://your-resource-name.openai.azure.com/"
api_version = "2024-02-15-preview"            # Adjust if needed
deployment_name = "YOUR_DEPLOYMENT_NAME"      # e.g. "gpt-4"

client = AzureOpenAI(
    api_key=api_key,
    api_version=api_version,
    azure_endpoint=api_base
)


# Measure generation time
start_time = time.time()

response = client.chat.completions.create(
    model=deployment_name,
    messages=[
        {"role": "system", "content": "You are a helpful assistant that explains model risk scores."},
        {"role": "user", "content": prompt_text}
    ],
    temperature=0.3,
    max_tokens=500
)

end_time = time.time()
generation_time = end_time - start_time

generated_text = response.choices[0].message.content.strip()

# ========== 4. Print the Response ==========
generated_text = response.choices[0].message.content.strip()
print("\n=== Generated Narrative ===")
print(generated_text)
print(f"\nGeneration Time: {generation_time:.2f} seconds")

# ========== 4. Evaluate the Explanation ==========
evaluation_prompt = f"""
Please evaluate the following risk explanation on the following criteria (scale 1-5):
1. Clarity
2. Conciseness
3. Completeness

Provide a short justification for each score.

Generated Explanation:
\"\"\"
{generated_text}
\"\"\"
"""

evaluation_response = client.chat.completions.create(
    model=deployment_name,
    messages=[
        {"role": "system", "content": "You are an evaluation assistant that rates the quality of risk explanations."},
        {"role": "user", "content": evaluation_prompt}
    ],
    temperature=0.0,
    max_tokens=300
)

evaluation_text = evaluation_response.choices[0].message.content.strip()

print("\n=== Evaluation ===")
print(evaluation_text)

In [None]:
# Define your deployed Azure OpenAI judge models
judge_models = {
    "GPT-4": "gpt-4-deployment",          # Replace with your Azure deployment name
    "GPT-4o": "gpt-4o-deployment",        # Replace with your Azure deployment name
    "GPT-35-Turbo": "gpt-35-turbo-deployment"  # Replace with your Azure deployment name
}

# Function to evaluate with each judge model
def evaluate_with_judge(deployment_name, prompt, output):
    eval_prompt = f"""
You are an evaluation assistant. Given a prompt and a model-generated answer, please assess the quality of the answer based on:
- Clarity (1-5)
- Conciseness (1-5)
- Completeness (1-5)
Provide a score for each, then write a short summary comment.

Prompt:
{prompt}

Model-Generated Answer:
{output}
"""
    response = openai.ChatCompletion.create(
        engine=deployment_name,
        messages=[
            {"role": "system", "content": "You are a helpful assistant that evaluates AI-generated text."},
            {"role": "user", "content": eval_prompt}
        ],
        temperature=0
    )
    eval_text = response['choices'][0]['message']['content']
    return eval_text

# Evaluate using each judge model
results = []
start_time = datetime.now()

for judge_name, deployment_name in judge_models.items():
    eval_start = datetime.now()
    eval_text = evaluate_with_judge(deployment_name, prompt_text, generated_narrative)
    eval_end = datetime.now()
    time_taken = (eval_end - eval_start).total_seconds()
    results.append({
        "Judge Model": judge_name,
        "Evaluation Text": eval_text.strip(),
        "Time Taken (s)": time_taken
    })

end_time = datetime.now()
total_time = (end_time - start_time).total_seconds()

# Convert results to DataFrame and save
df_results = pd.DataFrame(results)

print("\n=== Evaluation Results ===")
print(df_results.to_markdown(index=False))

df_results.to_csv("evaluation_results_azure_openai.csv", index=False)
print(f"\nTotal evaluation time: {total_time:.2f} seconds")