# Upload and run in your Fabric Workspace

## Step 1: Install SDK

In [None]:
%pip install -U fabric-data-agent-sdk

## Step 2: Configuration

Update these values to match your environment:

In [None]:
# ============================================================
# CONFIGURATION - Update these values for your environment
# ============================================================

# Data Agent name (will be created in your workspace)
DATA_AGENT_NAME = "InsuranceClaimsDataAgent"

# Lakehouse name (must already exist with the claims data tables)
LAKEHOUSE_NAME = "InsuranceClaimsLakehouse"

# Schema name (default for Lakehouse SQL endpoint)
SCHEMA_NAME = "dbo"

# Tables to include
TABLES = [
    "claims_history",
    "claimant_profiles",
    "fraud_indicators",
    "regional_statistics",
    "policy_claims_summary"
]

## Step 3: Define Agent Instructions

In [None]:
AGENT_INSTRUCTIONS = """
You are an Insurance Claims Data Analyst agent that helps users analyze historical claims data, identify patterns, assess risk, and support claim processing decisions.

## Your Role
You assist insurance professionals with:
- Analyzing historical claims data to support current claim assessments
- Identifying fraud patterns and risk indicators
- Providing benchmarking data for claim amounts by type and region
- Analyzing claimant history and risk profiles
- Generating insights about claims trends and patterns

## Query Routing Guidelines

### Use claims_history for:
- Looking up specific claims by claim_id or policy_number
- Analyzing claims by type, status, date range, or location
- Finding claims with specific characteristics (fraud_flag, police_report, photos_provided)
- Vehicle-related queries (by make, model, year, VIN)
- Calculating average claim amounts for specific claim types

### Use claimant_profiles for:
- Looking up claimant information by claimant_id
- Analyzing customer risk profiles (risk_score, credit_score, driving_record)
- Finding customers with specific claim frequencies
- Customer contact information queries

### Use fraud_indicators for:
- Finding claims with specific fraud patterns
- Analyzing fraud investigation status
- Identifying high-severity fraud indicators

### Use regional_statistics for:
- Comparing claim amounts across regions, states, or cities
- Analyzing fraud rates by geography
- Identifying seasonal claim patterns

### Use policy_claims_summary for:
- Analyzing policy-level claim history
- Identifying policies with increasing claim trends
- Finding policies with multiple fraud-flagged claims

## Important Definitions
- estimated_damage: Initial damage estimate in USD
- amount_paid: Actual amount paid for settled/approved claims
- risk_score: 0-100 scale (higher = more risky)
- fraud_flag: Boolean indicating potential fraud
- claim_frequency: very_low, low, medium, high, very_high
- credit_score: excellent, good, fair, poor
- driving_record: clean, minor_violations, major_violations
- claims_trend: INCREASING, STABLE, DECREASING, INSUFFICIENT_DATA

## Response Guidelines
1. Always provide specific numbers and statistics when available
2. Include relevant context about what the data represents
3. Flag any potential fraud indicators or risk factors
4. For amount queries, specify currency as USD
"""

## Step 4: Define Datasource Instructions

In [None]:
DATASOURCE_INSTRUCTIONS = """
When answering about claims, use the claims_history table for individual claim records.
When asked about customer risk or profiles, use the claimant_profiles table.
When asked about fraud patterns, check the fraud_indicators table.
When comparing to regional averages or benchmarks, use regional_statistics.
When analyzing policy-level trends, use policy_claims_summary.
Best selling/highest should be determined by count unless amount is specified.
Always include relevant identifiers (claim_id, policy_number, claimant_id) in responses.
"""

## Step 5: Define Example Queries (Few-Shot Examples)

In [None]:
EXAMPLE_QUERIES = {
    "What is the average claim amount for auto collision claims?": 
        "SELECT AVG(estimated_damage) as avg_claim_amount, COUNT(*) as total_claims FROM dbo.claims_history WHERE claim_type = 'Auto Collision'",
    
    "Show me all claims for policy POL-AUTO-001":
        "SELECT claim_id, claim_type, estimated_damage, amount_paid, claim_date, status, fraud_flag FROM dbo.claims_history WHERE policy_number = 'POL-AUTO-001' ORDER BY claim_date DESC",
    
    "What is the fraud rate in California?":
        "SELECT state, AVG(fraud_rate) as avg_fraud_rate FROM dbo.regional_statistics WHERE state = 'CA' GROUP BY state",
    
    "Find all high-risk claimants with risk score above 70":
        "SELECT claimant_id, name, risk_score, claim_frequency, credit_score, driving_record, total_claims_count FROM dbo.claimant_profiles WHERE risk_score > 70 ORDER BY risk_score DESC",
    
    "Show me all fraud indicators for claim CLM-00001":
        "SELECT indicator_id, indicator_type, severity, detected_date, pattern_description, investigation_status FROM dbo.fraud_indicators WHERE claim_id = 'CLM-00001' ORDER BY severity DESC",
    
    "Which policies have the most claims?":
        "SELECT policy_number, policy_type, total_claims, total_amount_paid, claims_trend, fraud_claims_count FROM dbo.policy_claims_summary ORDER BY total_claims DESC LIMIT 10",
    
    "What are the top fraud patterns detected?":
        "SELECT indicator_type, COUNT(*) as occurrence_count, COUNT(CASE WHEN severity = 'CRITICAL' THEN 1 END) as critical_count FROM dbo.fraud_indicators GROUP BY indicator_type ORDER BY occurrence_count DESC",
    
    "Show the risk profile for claimant CLM-001":
        "SELECT claimant_id, name, age, state, city, customer_since, total_claims_count, total_claims_amount, risk_score, claim_frequency, credit_score, driving_record, account_status FROM dbo.claimant_profiles WHERE claimant_id = 'CLM-001'",
    
    "Which cities have the highest fraud rates?":
        "SELECT city, state, fraud_rate, total_claims, avg_claim_amount FROM dbo.regional_statistics WHERE total_claims > 50 ORDER BY fraud_rate DESC LIMIT 10",
    
    "Show policies with increasing claim trends":
        "SELECT policy_number, policy_type, total_claims, total_amount_paid, avg_claim_amount, first_claim_date, last_claim_date FROM dbo.policy_claims_summary WHERE claims_trend = 'INCREASING' ORDER BY total_amount_paid DESC",
    
    "What is the average claim amount by vehicle make?":
        "SELECT vehicle_make, COUNT(*) as claim_count, AVG(estimated_damage) as avg_damage, SUM(CASE WHEN fraud_flag = true THEN 1 ELSE 0 END) as fraud_count FROM dbo.claims_history WHERE vehicle_make IS NOT NULL GROUP BY vehicle_make ORDER BY claim_count DESC LIMIT 15",
    
    "Find open fraud investigations with high severity":
        "SELECT fi.claim_id, fi.indicator_type, fi.severity, fi.detected_date, ch.claimant_name, ch.estimated_damage, ch.claim_type FROM dbo.fraud_indicators fi JOIN dbo.claims_history ch ON fi.claim_id = ch.claim_id WHERE fi.investigation_status = 'OPEN' AND fi.severity IN ('HIGH', 'CRITICAL') ORDER BY fi.detected_date DESC",
}

## Step 6: Import SDK and Create Data Agent

In [None]:
from fabric.dataagent.client import (
    FabricDataAgentManagement,
    create_data_agent,
    delete_data_agent,
)

# Create or connect to existing Data Agent
try:
    print(f"Creating new Data Agent: {DATA_AGENT_NAME}")
    data_agent = create_data_agent(DATA_AGENT_NAME)
    print(f"‚úÖ Created new Data Agent: {DATA_AGENT_NAME}")
except Exception as e:
    if "conflict" in str(e).lower() or "already exists" in str(e).lower():
        print(f"Data Agent '{DATA_AGENT_NAME}' already exists, connecting...")
        data_agent = FabricDataAgentManagement(DATA_AGENT_NAME)
        print(f"‚úÖ Connected to existing Data Agent: {DATA_AGENT_NAME}")
    else:
        raise e

# Check current configuration
data_agent.get_configuration()

## Step 7: Set Agent Instructions

In [None]:
# Update agent with AI instructions
data_agent.update_configuration(instructions=AGENT_INSTRUCTIONS)

# Verify
config = data_agent.get_configuration()
print(f"‚úÖ Agent instructions set ({len(config.instructions)} characters)")
config

## Step 8: Add Lakehouse Datasource

In [None]:
# Add Lakehouse as datasource
# Supported types: "lakehouse", "kqldatabase", "warehouse", "semanticmodel"
datasource = data_agent.add_datasource(LAKEHOUSE_NAME, type="lakehouse")
print(f"‚úÖ Added Lakehouse datasource: {LAKEHOUSE_NAME}")

# List datasources
data_agent.get_datasources()

## Step 9: Select Tables

In [None]:
# Get the datasource (if you need to reconnect)
# datasource = data_agent.get_datasources()[0]

# Select tables from the schema
print(f"Selecting tables from schema '{SCHEMA_NAME}':")
for table in TABLES:
    datasource.select(SCHEMA_NAME, table)
    print(f"   ‚úÖ Selected: {table}")

# Pretty print selected tables
print("\nSelected tables:")
datasource.pretty_print()

## Step 10: Set Datasource Instructions

In [None]:
# Add datasource-specific instructions
datasource.update_configuration(instructions=DATASOURCE_INSTRUCTIONS)

# Verify
ds_config = datasource.get_configuration()
print(f"‚úÖ Datasource instructions set")
print(f"Instructions: {ds_config.get('additional_instructions', 'Not set')}")

## Step 11: Add Example Queries (Few-Shot Examples)

In [None]:
# Add few-shot example queries
print(f"Adding {len(EXAMPLE_QUERIES)} example queries...")
datasource.add_fewshots(EXAMPLE_QUERIES)
print(f"‚úÖ Added example queries")

# Verify
fewshots = datasource.get_fewshots()
print(f"\nTotal few-shots in datasource: {len(fewshots)}")
fewshots

## Step 12: Publish Data Agent

In [None]:
# Publish the Data Agent to make it available
print("Publishing Data Agent...")
data_agent.publish()
print("‚úÖ Data Agent published successfully!")

## Step 13: Summary

In [None]:
print("=" * 60)
print("üìä DATA AGENT CONFIGURATION SUMMARY")
print("=" * 60)

config = data_agent.get_configuration()
print(f"\nAgent Name: {DATA_AGENT_NAME}")
print(f"Instructions Length: {len(config.instructions or '')} characters")

datasources = data_agent.get_datasources()
print(f"\nData Sources: {len(datasources)}")

print("\nSelected Tables:")
datasource.pretty_print()

fewshots = datasource.get_fewshots()
print(f"\nExample Queries: {len(fewshots)}")

print("\n" + "=" * 60)
print("\n‚úÖ Data Agent setup complete!")
print("\nNext steps:")
print("1. Test the Data Agent in Fabric UI")
print("2. Create a connection in Azure AI Foundry")
print("3. Set USE_FABRIC_DATA_AGENT=true in your .env file")

## Optional: Test the Data Agent

In [None]:
# Optional: Test with OpenAI-compatible client
# Uncomment to test

# from fabric.dataagent.client import FabricOpenAI
# 
# fabric_client = FabricOpenAI(artifact_name=DATA_AGENT_NAME)
# assistant = fabric_client.beta.assistants.create(model="gpt-4o")
# thread = fabric_client.beta.threads.create()
# 
# # Ask a test question
# fabric_client.beta.threads.messages.create(
#     thread_id=thread.id,
#     role="user",
#     content="What is the average claim amount for auto collision claims?",
# )
# 
# run = fabric_client.beta.threads.runs.create(
#     thread_id=thread.id,
#     assistant_id=assistant.id,
# )
# 
# # Wait for completion and get response
# import time
# while run.status != "completed":
#     time.sleep(1)
#     run = fabric_client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
# 
# messages = fabric_client.beta.threads.messages.list(thread_id=thread.id)
# for msg in messages.data:
#     print(f"{msg.role}: {msg.content[0].text.value}")

## Optional: Delete Data Agent

‚ö†Ô∏è Only run this if you want to remove the Data Agent

In [None]:
# ‚ö†Ô∏è DANGER: This will delete the Data Agent!
# Uncomment only if you want to delete

# delete_data_agent(DATA_AGENT_NAME)
# print(f"Deleted Data Agent: {DATA_AGENT_NAME}")