In [1]:
import pandas as pd
import sys
import os
sys.path.append('../')
from prompt.visualization_prompt import get_visualization_prompt
from claude_viz import call_claude
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load CSV file
csv_file = 'Delinquency_prediction_dataset.csv'
df = pd.read_csv(csv_file)
print(f"Data shape: {df.shape}")
df.head()

Data shape: (500, 19)


Unnamed: 0,Customer_ID,Age,Income,Credit_Score,Credit_Utilization,Missed_Payments,Delinquent_Account,Loan_Balance,Debt_to_Income_Ratio,Employment_Status,Account_Tenure,Credit_Card_Type,Location,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6
0,CUST0001,56,165580.0,398.0,0.390502,3,0,16310.0,0.317396,EMP,18,Student,Los Angeles,Late,Late,Missed,Late,Missed,Late
1,CUST0002,69,100999.0,493.0,0.312444,6,1,17401.0,0.196093,Self-employed,0,Standard,Phoenix,Missed,Missed,Late,Missed,On-time,On-time
2,CUST0003,46,188416.0,500.0,0.35993,0,0,13761.0,0.301655,Self-employed,1,Platinum,Chicago,Missed,Late,Late,On-time,Missed,Late
3,CUST0004,32,101672.0,413.0,0.3714,3,0,88778.0,0.264794,Unemployed,15,Platinum,Phoenix,Late,Missed,Late,Missed,Late,Late
4,CUST0005,60,38524.0,487.0,0.234716,2,0,13316.0,0.510583,Self-employed,11,Standard,Phoenix,Missed,On-time,Missed,Late,Late,Late


In [3]:
# Get visualization prompt
data_context = f"Dataset with {df.shape[0]} rows and {df.shape[1]} columns. Columns: {list(df.columns)}"
viz_prompt = get_visualization_prompt(data_context)
print("Visualization Expert Prompt:")
print(viz_prompt[:500] + "...")

Visualization Expert Prompt:

You are an intelligent Visualization Agent specializing in data-driven chart generation and dashboard creation.

## ROLE DEFINITION
Your primary responsibility is to generate publication-quality visualizations that transform raw data into actionable insights through appropriate chart selection, professional formatting, and clear annotation.

## PRIMARY RESPONSIBILITIES

1. **Intelligent Chart Type Selection**
   - Analyze data structure, dimensionality, and analytical intent
   - Match visualiz...


In [7]:
# Call Claude via AWS Bedrock
from bedrock_claude import call_bedrock_claude

# Prepare data summary
data_summary = f"""
Data Overview:
{df.describe()}

Sample Data:
{df.head()}
"""

# Get Claude's recommendations via Bedrock
recommendations = call_bedrock_claude(viz_prompt, data_summary)
print("Claude's Visualization Recommendations:")
print(recommendations)


Claude's Visualization Recommendations:
Based on the provided dataset and the context, here are the recommended visualizations to analyze the data:

1. **Customer Age Distribution**:
   - Visualization Type: Histogram or Box Plot
   - Rationale: Understand the distribution of customer ages and identify any outliers or skewness in the data.

2. **Income Distribution**:
   - Visualization Type: Histogram or Box Plot
   - Rationale: Analyze the distribution of customer incomes to identify any patterns or segments.

3. **Credit Score vs. Income Scatterplot**:
   - Visualization Type: Scatterplot
   - Rationale: Explore the relationship between customer credit scores and their income levels.

4. **Credit Utilization vs. Debt-to-Income Ratio**:
   - Visualization Type: Heatmap or Scatterplot with Color Encoding
   - Rationale: Visualize the correlation between credit utilization and debt-to-income ratio, which can provide insights into customer financial health.

5. **Missed Payments and Del

In [None]:
# Execute visualizations based on Claude's recommendations
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) >= 2:
    plt.figure(figsize=(12, 8))
    
    # Correlation heatmap
    plt.subplot(2, 2, 1)
    sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm')
    plt.title('Correlation Matrix')
    
    # Distribution of first numeric column
    plt.subplot(2, 2, 2)
    df[numeric_cols[0]].hist(bins=20)
    plt.title(f'Distribution of {numeric_cols[0]}')
    
    # Scatter plot if 2+ numeric columns
    if len(numeric_cols) >= 2:
        plt.subplot(2, 2, 3)
        plt.scatter(df[numeric_cols[0]], df[numeric_cols[1]])
        plt.xlabel(numeric_cols[0])
        plt.ylabel(numeric_cols[1])
        plt.title('Scatter Plot')
    
    plt.tight_layout()
    plt.show()

print("\nVisualization complete - following Claude's expert recommendations")