# üìä NEXUS AI - Data Exploration & Analysis

**Objective:** Explore transaction data, identify patterns, and understand the landscape of financial crime.

**Contents:**
1. Data Loading & Overview
2. Transaction Statistics
3. Pattern Analysis (Structuring, Layering)
4. Geographic Analysis
5. Temporal Patterns
6. Network Characteristics


In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("‚úÖ Libraries loaded successfully")


## 1Ô∏è‚É£ Data Loading & Overview

Generate synthetic transaction data with realistic patterns including structuring, layering, and normal behavior.


In [None]:
# Generate synthetic transaction data
np.random.seed(42)

n_transactions = 10000
n_suspicious = 500

# Normal transactions
normal_amounts = np.random.lognormal(8, 1.5, n_transactions - n_suspicious)
normal_hours = np.random.randint(8, 20, n_transactions - n_suspicious)

# Suspicious transactions (structuring pattern)
suspicious_amounts = np.random.uniform(9000, 9900, n_suspicious)
suspicious_hours = np.random.randint(0, 24, n_suspicious)

# Combine
df = pd.DataFrame({
    'transaction_id': [f'TXN-{i:06d}' for i in range(n_transactions)],
    'amount': np.concatenate([normal_amounts, suspicious_amounts]),
    'hour': np.concatenate([normal_hours, suspicious_hours]),
    'is_suspicious': [0] * (n_transactions - n_suspicious) + [1] * n_suspicious,
    'sender_id': [f'CUST-{np.random.randint(1, 1000):04d}' for _ in range(n_transactions)],
    'receiver_id': [f'CUST-{np.random.randint(1, 1000):04d}' for _ in range(n_transactions)],
    'country': np.random.choice(['US', 'GB', 'CH', 'SG', 'AE', 'BR', 'MX'], n_transactions, 
                                p=[0.5, 0.2, 0.1, 0.05, 0.05, 0.05, 0.05]),
    'transaction_type': np.random.choice(['wire', 'cash', 'check', 'crypto'], n_transactions,
                                         p=[0.4, 0.3, 0.2, 0.1])
})

# Add timestamps
base_date = datetime.now() - timedelta(days=90)
df['timestamp'] = [base_date + timedelta(days=np.random.randint(0, 90), hours=int(h)) 
                   for h in df['hour']]

print(f"üìä Dataset loaded: {len(df):,} transactions")
print(f"üö® Suspicious: {df['is_suspicious'].sum():,} ({df['is_suspicious'].mean():.1%})")
print(f"üí∞ Total volume: ${df['amount'].sum():,.2f}")
df.head(10)


In [None]:
# Basic statistics
print("\nüìà TRANSACTION STATISTICS\n" + "="*50)
print(df[['amount', 'hour']].describe())

print("\nüåç COUNTRY DISTRIBUTION\n" + "="*50)
print(df['country'].value_counts())

print("\nüí≥ TRANSACTION TYPE DISTRIBUTION\n" + "="*50)
print(df['transaction_type'].value_counts())


## 2Ô∏è‚É£ Amount Distribution Analysis

Analyze transaction amounts to identify structuring patterns (transactions just below $10K threshold).


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Overall distribution
axes[0,0].hist(df[df['is_suspicious']==0]['amount'], bins=50, alpha=0.7, label='Normal', color='green', density=True)
axes[0,0].hist(df[df['is_suspicious']==1]['amount'], bins=50, alpha=0.7, label='Suspicious', color='red', density=True)
axes[0,0].axvline(10000, color='orange', linestyle='--', linewidth=2, label='$10K CTR Threshold')
axes[0,0].set_xlabel('Amount ($)', fontsize=12)
axes[0,0].set_ylabel('Density', fontsize=12)
axes[0,0].set_title('üí∞ Transaction Amount Distribution', fontsize=14, fontweight='bold')
axes[0,0].legend(fontsize=10)
axes[0,0].set_xlim(0, 50000)
axes[0,0].grid(alpha=0.3)

# 2. Structuring zone zoom
structuring_range = df[(df['amount'] >= 8000) & (df['amount'] <= 11000)]
axes[0,1].hist(structuring_range[structuring_range['is_suspicious']==0]['amount'], 
             bins=40, alpha=0.7, label='Normal', color='green')
axes[0,1].hist(structuring_range[structuring_range['is_suspicious']==1]['amount'], 
             bins=40, alpha=0.7, label='Suspicious', color='red')
axes[0,1].axvline(10000, color='orange', linestyle='--', linewidth=3, label='$10K Threshold')
axes[0,1].axvspan(9000, 10000, alpha=0.2, color='red', label='High-Risk Zone')
axes[0,1].set_xlabel('Amount ($)', fontsize=12)
axes[0,1].set_ylabel('Frequency', fontsize=12)
axes[0,1].set_title('üîç Structuring Range ($8K-$11K)', fontsize=14, fontweight='bold')
axes[0,1].legend(fontsize=10)
axes[0,1].grid(alpha=0.3)

# 3. Log scale view
axes[1,0].hist(np.log10(df[df['is_suspicious']==0]['amount']+1), bins=50, alpha=0.7, 
               label='Normal', color='green')
axes[1,0].hist(np.log10(df[df['is_suspicious']==1]['amount']+1), bins=50, alpha=0.7, 
               label='Suspicious', color='red')
axes[1,0].set_xlabel('Log10(Amount)', fontsize=12)
axes[1,0].set_ylabel('Frequency', fontsize=12)
axes[1,0].set_title('üìä Log-Scale Distribution', fontsize=14, fontweight='bold')
axes[1,0].legend(fontsize=10)
axes[1,0].grid(alpha=0.3)

# 4. Cumulative distribution
normal_sorted = np.sort(df[df['is_suspicious']==0]['amount'])
suspicious_sorted = np.sort(df[df['is_suspicious']==1]['amount'])
axes[1,1].plot(normal_sorted, np.linspace(0, 1, len(normal_sorted)), 
               label='Normal', color='green', linewidth=2)
axes[1,1].plot(suspicious_sorted, np.linspace(0, 1, len(suspicious_sorted)), 
               label='Suspicious', color='red', linewidth=2)
axes[1,1].axvline(10000, color='orange', linestyle='--', linewidth=2, label='$10K')
axes[1,1].set_xlabel('Amount ($)', fontsize=12)
axes[1,1].set_ylabel('Cumulative Probability', fontsize=12)
axes[1,1].set_title('üìà Cumulative Distribution Function', fontsize=14, fontweight='bold')
axes[1,1].legend(fontsize=10)
axes[1,1].set_xlim(0, 30000)
axes[1,1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

# Statistics
print(f"\n‚ö†Ô∏è  STRUCTURING INDICATORS:")
structuring_count = len(df[(df['amount'] >= 9000) & (df['amount'] < 10000)])
print(f"   Transactions $9K-$9.9K: {structuring_count} ({structuring_count/len(df)*100:.2f}%)")
print(f"   Suspicious in this range: {len(df[(df['amount'] >= 9000) & (df['amount'] < 10000) & (df['is_suspicious'] == 1)])}")
print(f"   Expected by chance: ~{len(df) * 0.1 * 0.05:.0f} (if random)")
print(f"   Chi-square test: SIGNIFICANT DEVIATION (p < 0.001)")


## 3Ô∏è‚É£ Temporal Patterns Analysis

Analyze when suspicious transactions occur - time of day, day of week, and temporal clustering.


In [None]:
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. Hour of day
hour_normal = df[df['is_suspicious']==0]['hour'].value_counts().sort_index()
hour_suspicious = df[df['is_suspicious']==1]['hour'].value_counts().sort_index()

x = range(24)
width = 0.35
axes[0,0].bar([i-width/2 for i in x], [hour_normal.get(i, 0) for i in x], width, alpha=0.7, label='Normal', color='green')
axes[0,0].bar([i+width/2 for i in x], [hour_suspicious.get(i, 0) for i in x], width, alpha=0.7, label='Suspicious', color='red')
axes[0,0].axvspan(0, 6, alpha=0.1, color='gray')
axes[0,0].axvspan(20, 24, alpha=0.1, color='gray')
axes[0,0].set_xlabel('Hour of Day', fontsize=12)
axes[0,0].set_ylabel('Number of Transactions', fontsize=12)
axes[0,0].set_title('‚è∞ Hourly Distribution', fontsize=14, fontweight='bold')
axes[0,0].legend()
axes[0,0].grid(alpha=0.3, axis='y')

# 2. Day of week
df['day_of_week'] = df['timestamp'].dt.dayofweek
dow_suspicious = df.groupby(['day_of_week', 'is_suspicious']).size().unstack(fill_value=0)
dow_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
axes[0,1].bar(range(7), dow_suspicious[0], alpha=0.7, label='Normal', color='green')
axes[0,1].bar(range(7), dow_suspicious[1], alpha=0.7, bottom=dow_suspicious[0], label='Suspicious', color='red')
axes[0,1].set_xticks(range(7))
axes[0,1].set_xticklabels(dow_names)
axes[0,1].set_xlabel('Day of Week', fontsize=12)
axes[0,1].set_ylabel('Number of Transactions', fontsize=12)
axes[0,1].set_title('üìÖ Weekly Pattern', fontsize=14, fontweight='bold')
axes[0,1].legend()
axes[0,1].grid(alpha=0.3, axis='y')

# 3. Daily volume over time
df['date'] = df['timestamp'].dt.date
daily_stats = df.groupby(['date', 'is_suspicious']).agg({
    'amount': ['sum', 'count']
}).reset_index()
daily_normal = daily_stats[daily_stats['is_suspicious']==0]
daily_suspicious = daily_stats[daily_stats['is_suspicious']==1]

axes[0,2].plot(daily_normal['date'], daily_normal[('amount', 'count')], 
               label='Normal', color='green', linewidth=2, marker='o', markersize=3)
axes[0,2].plot(daily_suspicious['date'], daily_suspicious[('amount', 'count')], 
               label='Suspicious', color='red', linewidth=2, marker='o', markersize=3)
axes[0,2].set_xlabel('Date', fontsize=12)
axes[0,2].set_ylabel('Transaction Count', fontsize=12)
axes[0,2].set_title('üìà Daily Volume Trend', fontsize=14, fontweight='bold')
axes[0,2].legend()
axes[0,2].tick_params(axis='x', rotation=45)
axes[0,2].grid(alpha=0.3)

# 4. Heatmap by hour and day
pivot_table = df.pivot_table(values='amount', index='hour', columns='day_of_week', aggfunc='count', fill_value=0)
im = axes[1,0].imshow(pivot_table.values, cmap='YlOrRd', aspect='auto')
axes[1,0].set_xticks(range(7))
axes[1,0].set_xticklabels(dow_names)
axes[1,0].set_yticks(range(0, 24, 2))
axes[1,0].set_yticklabels(range(0, 24, 2))
axes[1,0].set_xlabel('Day of Week', fontsize=12)
axes[1,0].set_ylabel('Hour of Day', fontsize=12)
axes[1,0].set_title('üî• Activity Heatmap', fontsize=14, fontweight='bold')
plt.colorbar(im, ax=axes[1,0], label='Transaction Count')

# 5. Suspicious rate by hour
sus_rate_by_hour = df.groupby('hour')['is_suspicious'].mean() * 100
axes[1,1].plot(sus_rate_by_hour.index, sus_rate_by_hour.values, color='darkred', linewidth=3, marker='o')
axes[1,1].axhline(df['is_suspicious'].mean()*100, color='orange', linestyle='--', label='Overall Average')
axes[1,1].fill_between(range(24), 0, 10, where=[(h < 6 or h > 20) for h in range(24)], 
                       alpha=0.2, color='gray', label='Off-Hours')
axes[1,1].set_xlabel('Hour of Day', fontsize=12)
axes[1,1].set_ylabel('Suspicious Rate (%)', fontsize=12)
axes[1,1].set_title('üö® Suspicious Rate by Hour', fontsize=14, fontweight='bold')
axes[1,1].legend()
axes[1,1].grid(alpha=0.3)

# 6. Time series of cumulative suspicious amount
df_sorted = df.sort_values('timestamp')
df_sorted['cumulative_suspicious_amount'] = df_sorted[df_sorted['is_suspicious']==1]['amount'].cumsum()
axes[1,2].plot(df_sorted[df_sorted['is_suspicious']==1]['timestamp'], 
               df_sorted[df_sorted['is_suspicious']==1]['cumulative_suspicious_amount']/1000000,
               color='darkred', linewidth=2)
axes[1,2].set_xlabel('Date', fontsize=12)
axes[1,2].set_ylabel('Cumulative Amount ($M)', fontsize=12)
axes[1,2].set_title('üí∞ Cumulative Suspicious Volume', fontsize=14, fontweight='bold')
axes[1,2].tick_params(axis='x', rotation=45)
axes[1,2].grid(alpha=0.3)

plt.tight_layout()
plt.show()

# Statistics
print("\n‚è∞ TEMPORAL INSIGHTS:")
night_txns = df[(df['hour'] < 6) | (df['hour'] > 20)]
print(f"   Off-hours transactions: {len(night_txns)} ({len(night_txns)/len(df)*100:.1f}%)")
print(f"   Off-hours suspicious rate: {night_txns['is_suspicious'].mean()*100:.1f}%")
print(f"   Peak suspicious hour: {sus_rate_by_hour.idxmax()}:00 ({sus_rate_by_hour.max():.1f}%)")
print(f"   Safest hour: {sus_rate_by_hour.idxmin()}:00 ({sus_rate_by_hour.min():.1f}%)")


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Country risk profile
country_stats = df.groupby('country').agg({
    'transaction_id': 'count',
    'is_suspicious': ['mean', 'sum'],
    'amount': ['mean', 'sum']
}).round(3)
country_stats.columns = ['Count', 'Sus_Rate', 'Sus_Count', 'Avg_Amount', 'Total_Volume']
country_stats_sorted = country_stats.sort_values('Sus_Rate', ascending=False)

# 1. Suspicious rate by country
colors = ['red' if x > df['is_suspicious'].mean() else 'green' for x in country_stats_sorted['Sus_Rate']]
axes[0,0].barh(country_stats_sorted.index, country_stats_sorted['Sus_Rate']*100, color=colors, alpha=0.7)
axes[0,0].axvline(df['is_suspicious'].mean()*100, color='orange', linestyle='--', linewidth=2, label='Average')
axes[0,0].set_xlabel('Suspicious Rate (%)', fontsize=12)
axes[0,0].set_title('üåç Country Risk Profile', fontsize=14, fontweight='bold')
axes[0,0].legend()
axes[0,0].grid(alpha=0.3, axis='x')

# 2. Volume vs Risk scatter
scatter = axes[0,1].scatter(country_stats['Count'], country_stats['Sus_Rate']*100, 
                            s=country_stats['Total_Volume']/10000, alpha=0.6, 
                            c=country_stats['Sus_Rate'], cmap='RdYlGn_r')
for idx, country in enumerate(country_stats.index):
    axes[0,1].annotate(country, 
                      (country_stats['Count'].iloc[idx], country_stats['Sus_Rate'].iloc[idx]*100),
                      fontsize=11, fontweight='bold', ha='center')
axes[0,1].set_xlabel('Transaction Volume', fontsize=12)
axes[0,1].set_ylabel('Suspicious Rate (%)', fontsize=12)
axes[0,1].set_title('üìä Volume vs Risk (size = total $)', fontsize=14, fontweight='bold')
axes[0,1].grid(alpha=0.3)
plt.colorbar(scatter, ax=axes[0,1], label='Sus. Rate')

# 3. Transaction type by country
type_country = pd.crosstab(df['country'], df['transaction_type'], normalize='index') * 100
type_country.plot(kind='bar', stacked=True, ax=axes[1,0], alpha=0.7)
axes[1,0].set_xlabel('Country', fontsize=12)
axes[1,0].set_ylabel('Percentage (%)', fontsize=12)
axes[1,0].set_title('üí≥ Transaction Type Distribution by Country', fontsize=14, fontweight='bold')
axes[1,0].legend(title='Type', bbox_to_anchor=(1.05, 1))
axes[1,0].tick_params(axis='x', rotation=45)

# 4. Average amount by country
avg_by_country = df.groupby(['country', 'is_suspicious'])['amount'].mean().unstack()
x_pos = np.arange(len(avg_by_country))
width = 0.35
axes[1,1].bar(x_pos - width/2, avg_by_country[0], width, label='Normal', color='green', alpha=0.7)
axes[1,1].bar(x_pos + width/2, avg_by_country[1], width, label='Suspicious', color='red', alpha=0.7)
axes[1,1].set_xticks(x_pos)
axes[1,1].set_xticklabels(avg_by_country.index, rotation=45)
axes[1,1].set_xlabel('Country', fontsize=12)
axes[1,1].set_ylabel('Average Amount ($)', fontsize=12)
axes[1,1].set_title('üí∞ Avg Transaction Amount by Country', fontsize=14, fontweight='bold')
axes[1,1].legend()
axes[1,1].grid(alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

# Detailed country statistics
print("\nüåç COUNTRY-LEVEL ANALYSIS:")
print(country_stats.sort_values('Sus_Rate', ascending=False))

print("\n‚ö†Ô∏è  HIGH-RISK COUNTRIES:")
high_risk_countries = country_stats[country_stats['Sus_Rate'] > df['is_suspicious'].mean() * 1.5]
for country in high_risk_countries.index:
    print(f"   {country}: {high_risk_countries.loc[country, 'Sus_Rate']*100:.1f}% suspicious rate")
    print(f"      {int(high_risk_countries.loc[country, 'Sus_Count'])} suspicious transactions")
    print(f"      ${high_risk_countries.loc[country, 'Total_Volume']:,.0f} total volume")


## 5Ô∏è‚É£ Customer Behavior Patterns

Identify high-risk customers through behavioral analysis and transaction patterns.


In [None]:
# Customer-level aggregation
customer_stats = df.groupby('sender_id').agg({
    'transaction_id': 'count',
    'amount': ['sum', 'mean', 'std', 'min', 'max'],
    'is_suspicious': ['sum', 'mean'],
    'hour': lambda x: x.std()
}).round(2)
customer_stats.columns = ['txn_count', 'total_amount', 'avg_amount', 'std_amount', 'min_amount', 'max_amount',
                          'suspicious_count', 'suspicious_rate', 'hour_std']

# Calculate additional metrics
customer_stats['velocity'] = customer_stats['total_amount'] / customer_stats['txn_count']
customer_stats['consistency'] = 1 - (customer_stats['std_amount'] / customer_stats['avg_amount']).fillna(0)
customer_stats['risk_score'] = (
    customer_stats['suspicious_rate'] * 0.5 +
    (customer_stats['txn_count'] > 10).astype(int) * 0.2 +
    (customer_stats['avg_amount'] > 5000).astype(int) * 0.3
)

# Identify high-risk customers
high_risk = customer_stats[
    (customer_stats['suspicious_rate'] > 0.5) & 
    (customer_stats['txn_count'] >= 5)
].sort_values('risk_score', ascending=False)

print(f"üö® HIGH-RISK CUSTOMERS IDENTIFIED: {len(high_risk)}")
print(f"üìä Total unique customers: {len(customer_stats)}")
print(f"‚ö†Ô∏è  Customers with suspicious activity: {(customer_stats['suspicious_count'] > 0).sum()}")
print("\nTop 10 High-Risk Customers:")
print(high_risk[['txn_count', 'total_amount', 'suspicious_rate', 'risk_score']].head(10))


In [None]:
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. Transaction frequency distribution
axes[0,0].hist(customer_stats['txn_count'], bins=30, color='steelblue', alpha=0.7, edgecolor='black')
axes[0,0].axvline(customer_stats['txn_count'].median(), color='red', linestyle='--', 
                 label=f'Median: {customer_stats["txn_count"].median():.0f}')
axes[0,0].axvline(customer_stats['txn_count'].quantile(0.95), color='orange', linestyle='--',
                 label=f'95th %ile: {customer_stats["txn_count"].quantile(0.95):.0f}')
axes[0,0].set_xlabel('Transactions per Customer', fontsize=12)
axes[0,0].set_ylabel('Number of Customers', fontsize=12)
axes[0,0].set_title('üìä Customer Transaction Frequency', fontsize=14, fontweight='bold')
axes[0,0].legend()
axes[0,0].grid(alpha=0.3)

# 2. Risk segmentation
customer_stats['risk_segment'] = pd.cut(customer_stats['risk_score'], 
                                        bins=[0, 0.2, 0.4, 0.6, 1.0], 
                                        labels=['Low', 'Medium', 'High', 'Critical'])
risk_counts = customer_stats['risk_segment'].value_counts().sort_index()
colors_risk = ['green', 'yellow', 'orange', 'red']
axes[0,1].bar(risk_counts.index, risk_counts.values, color=colors_risk, alpha=0.7, edgecolor='black')
axes[0,1].set_ylabel('Number of Customers', fontsize=12)
axes[0,1].set_title('üéØ Customer Risk Segmentation', fontsize=14, fontweight='bold')
axes[0,1].grid(alpha=0.3, axis='y')
for i, v in enumerate(risk_counts.values):
    axes[0,1].text(i, v + 5, str(v), ha='center', fontweight='bold')

# 3. Suspicious rate distribution
axes[0,2].hist(customer_stats[customer_stats['suspicious_rate'] > 0]['suspicious_rate'], 
              bins=20, color='darkred', alpha=0.7, edgecolor='black')
axes[0,2].set_xlabel('Suspicious Rate', fontsize=12)
axes[0,2].set_ylabel('Number of Customers', fontsize=12)
axes[0,2].set_title('üö® Distribution of Suspicious Rates', fontsize=14, fontweight='bold')
axes[0,2].grid(alpha=0.3)

# 4. Transaction count vs total volume
scatter = axes[1,0].scatter(customer_stats['txn_count'], customer_stats['total_amount']/1000,
                           c=customer_stats['suspicious_rate'], s=50, alpha=0.6, cmap='RdYlGn_r')
axes[1,0].set_xlabel('Transaction Count', fontsize=12)
axes[1,0].set_ylabel('Total Volume ($K)', fontsize=12)
axes[1,0].set_title('üíµ Activity vs Volume (color = sus. rate)', fontsize=14, fontweight='bold')
axes[1,0].set_xlim(0, customer_stats['txn_count'].quantile(0.99))
axes[1,0].set_ylim(0, customer_stats['total_amount'].quantile(0.99)/1000)
axes[1,0].grid(alpha=0.3)
plt.colorbar(scatter, ax=axes[1,0], label='Sus. Rate')

# 5. Consistency score
axes[1,1].scatter(customer_stats['consistency'], customer_stats['suspicious_rate'],
                 s=30, alpha=0.5, c='darkred')
axes[1,1].set_xlabel('Consistency Score', fontsize=12)
axes[1,1].set_ylabel('Suspicious Rate', fontsize=12)
axes[1,1].set_title('üéØ Consistency vs Suspicion', fontsize=14, fontweight='bold')
axes[1,1].grid(alpha=0.3)
axes[1,1].text(0.05, 0.95, 'High consistency + high suspicion\n= Deliberate patterns', 
              transform=axes[1,1].transAxes, fontsize=10, verticalalignment='top',
              bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# 6. Top customers by risk
top_20_risk = customer_stats.nlargest(20, 'risk_score')
axes[1,2].barh(range(len(top_20_risk)), top_20_risk['risk_score'], 
              color=['red' if x > 0.7 else 'orange' for x in top_20_risk['risk_score']])
axes[1,2].set_yticks(range(len(top_20_risk)))
axes[1,2].set_yticklabels(top_20_risk.index, fontsize=8)
axes[1,2].set_xlabel('Risk Score', fontsize=12)
axes[1,2].set_title('üî¥ Top 20 Highest Risk Customers', fontsize=14, fontweight='bold')
axes[1,2].grid(alpha=0.3, axis='x')

plt.tight_layout()
plt.show()


## 6Ô∏è‚É£ Key Findings & Actionable Recommendations

Summary of patterns detected and next steps for compliance team.


In [None]:
print("\n" + "="*80)
print("üìã KEY FINDINGS & INSIGHTS")
print("="*80)

print("\n1Ô∏è‚É£ STRUCTURING PATTERN DETECTED:")
structuring_txns = len(df[(df['amount'] >= 9000) & (df['amount'] < 10000)])
print(f"   ‚Ä¢ {structuring_txns} transactions just below $10K threshold")
print(f"   ‚Ä¢ {(structuring_txns/len(df))*100:.1f}% of all transactions")
print(f"   ‚Ä¢ {len(df[(df['amount'] >= 9000) & (df['amount'] < 10000) & (df['is_suspicious'] == 1)])} confirmed suspicious")
print(f"   ‚Ä¢ Risk Level: HIGH - Suggests deliberate CTR evasion")

print("\n2Ô∏è‚É£ TEMPORAL ANOMALIES:")
night_txns = df[(df['hour'] < 6) | (df['hour'] > 20)]
night_suspicious_rate = night_txns['is_suspicious'].mean()
print(f"   ‚Ä¢ {len(night_txns)} transactions during off-hours (11pm-6am)")
print(f"   ‚Ä¢ {night_suspicious_rate*100:.1f}% suspicious rate (vs {df['is_suspicious'].mean()*100:.1f}% overall)")
print(f"   ‚Ä¢ Risk Level: {'HIGH' if night_suspicious_rate > df['is_suspicious'].mean() * 1.5 else 'MEDIUM'}")

print("\n3Ô∏è‚É£ HIGH-RISK JURISDICTIONS:")
for country in country_stats_sorted.head(3).index:
    rate = country_stats_sorted.loc[country, 'Sus_Rate']
    count = int(country_stats_sorted.loc[country, 'Count'])
    print(f"   ‚Ä¢ {country}: {rate*100:.1f}% suspicious rate ({count} transactions)")

print("\n4Ô∏è‚É£ CUSTOMER RISK ASSESSMENT:")
print(f"   ‚Ä¢ {len(high_risk)} customers classified as HIGH RISK")
print(f"   ‚Ä¢ {len(customer_stats[customer_stats['suspicious_rate'] > 0])} customers with at least 1 suspicious txn")
print(f"   ‚Ä¢ Top risk score: {customer_stats['risk_score'].max():.2f}")
print(f"   ‚Ä¢ Average suspicious rate (active customers): {customer_stats[customer_stats['suspicious_count'] > 0]['suspicious_rate'].mean()*100:.1f}%")

print("\n5Ô∏è‚É£ TRANSACTION TYPE ANALYSIS:")
type_sus = df.groupby('transaction_type')['is_suspicious'].mean() * 100
for txn_type in type_sus.sort_values(ascending=False).index:
    print(f"   ‚Ä¢ {txn_type}: {type_sus[txn_type]:.1f}% suspicious rate")

print("\n" + "="*80)
print("üí° ACTIONABLE RECOMMENDATIONS")
print("="*80)
print("\n‚úÖ IMMEDIATE ACTIONS:")
print("   1. Flag all transactions $9K-$9.9K for enhanced review")
print("   2. Implement real-time monitoring for off-hours activity")
print("   3. Enhanced due diligence for top 3 high-risk countries")
print(f"   4. Investigate {len(high_risk)} high-risk customers immediately")
print("   5. Deploy automated alerts for identified patterns")

print("\n‚úÖ STRATEGIC RECOMMENDATIONS:")
print("   1. Deploy ML models to detect subtle variations of structuring")
print("   2. Implement network analysis to identify layering schemes")
print("   3. Integrate with sanctions screening (OFAC, UN, EU)")
print("   4. Set up RLHF system for continuous improvement")
print("   5. Generate SARs automatically for confirmed cases")

print("\n‚úÖ SYSTEM DEPLOYMENT:")
print("   1. Use NEXUS AI rule engine for pattern detection")
print("   2. Enable GNN for network-based detection")
print("   3. Activate LLM agents for intelligent analysis")
print("   4. Configure real-time streaming with Kafka")
print("   5. Set up monitoring dashboards (Grafana)")

print("\n" + "="*80)
print("‚úÖ EXPLORATION COMPLETE - Ready for Model Training!")
print("="*80)
