# VAE Synthetic Financial Data Generator - Complete Multi-Dimensional Analysis

**Enterprise-grade validation with comprehensive multi-dimensional comparisons**

- **Payer-centric analysis**: Transaction patterns by paying entities
- **Payee-centric analysis**: Receiving patterns by payee entities  
- **GICS-centric analysis**: Sector-level transaction flows
- **Industry-centric analysis**: Industry-specific patterns
- **Temporal analysis**: Time-of-day, business hours, peak periods
- **Cross-dimensional flows**: Multi-dimensional interaction patterns

**All analysis displayed directly in notebook with detailed tables and charts**

In [None]:
# Use the same Cells 1-8 from Azure_Databricks_VAE_Ready.ipynb
# This cell is just a placeholder - copy Cells 1-8 from the working notebook

In [None]:
# CELL 9: Complete Multi-Dimensional Validation Dashboard

from matplotlib.gridspec import GridSpec
from IPython.display import HTML, display
import seaborn as sns

def complete_multidimensional_validation(original: pd.DataFrame, synthetic: pd.DataFrame):
    """Complete validation with multi-dimensional analysis across all business dimensions."""
    
    print("🎯 COMPLETE MULTI-DIMENSIONAL VALIDATION DASHBOARD")
    print("=" * 80)
    
    # =============================================
    # SECTION 1: PAYER-CENTRIC ANALYSIS
    # =============================================
    print("\n💰 SECTION 1: PAYER-CENTRIC ANALYSIS")
    print("=" * 50)
    
    # Group by payer and analyze transaction patterns
    orig_payer_stats = original.groupby('payer_Company_Name')['ed_amount'].agg(['count', 'mean', 'sum', 'std']).reset_index()
    synth_payer_stats = synthetic.groupby('payer_Company_Name')['ed_amount'].agg(['count', 'mean', 'sum', 'std']).reset_index()
    
    # Compare top 10 payers by total volume
    top_payers = orig_payer_stats.nlargest(10, 'sum')['payer_Company_Name'].tolist()
    
    payer_analysis = []
    for payer in top_payers:
        orig_data = orig_payer_stats[orig_payer_stats['payer_Company_Name'] == payer]
        synth_data = synth_payer_stats[synth_payer_stats['payer_Company_Name'] == payer]
        
        if not orig_data.empty:
            orig_count = orig_data['count'].iloc[0]
            synth_count = synth_data['count'].iloc[0] if not synth_data.empty else 0
            orig_avg = orig_data['mean'].iloc[0]
            synth_avg = synth_data['mean'].iloc[0] if not synth_data.empty else 0
            orig_total = orig_data['sum'].iloc[0]
            synth_total = synth_data['sum'].iloc[0] if not synth_data.empty else 0
            
            count_diff = ((synth_count - orig_count) / orig_count * 100) if orig_count > 0 else 0
            avg_diff = ((synth_avg - orig_avg) / orig_avg * 100) if orig_avg > 0 else 0
            total_diff = ((synth_total - orig_total) / orig_total * 100) if orig_total > 0 else 0
            
            payer_analysis.append({
                'Payer': payer[:25] + '...' if len(payer) > 25 else payer,
                'Orig_Transactions': orig_count,
                'Synth_Transactions': synth_count,
                'Count_Diff_%': f"{count_diff:+.1f}%",
                'Orig_Avg_Amount': f"${orig_avg:,.0f}",
                'Synth_Avg_Amount': f"${synth_avg:,.0f}",
                'Amount_Diff_%': f"{avg_diff:+.1f}%",
                'Total_Volume_Diff_%': f"{total_diff:+.1f}%"
            })
    
    payer_df = pd.DataFrame(payer_analysis)
    print("\n💸 TOP PAYER COMPARISON:")
    display(payer_df)
    
    # =============================================
    # SECTION 2: PAYEE-CENTRIC ANALYSIS
    # =============================================
    print("\n💳 SECTION 2: PAYEE-CENTRIC ANALYSIS")
    print("=" * 50)
    
    # Group by payee and analyze receiving patterns
    orig_payee_stats = original.groupby('payee_Company_Name')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    synth_payee_stats = synthetic.groupby('payee_Company_Name')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    
    # Compare top 10 payees by total received volume
    top_payees = orig_payee_stats.nlargest(10, 'sum')['payee_Company_Name'].tolist()
    
    payee_analysis = []
    for payee in top_payees:
        orig_data = orig_payee_stats[orig_payee_stats['payee_Company_Name'] == payee]
        synth_data = synth_payee_stats[synth_payee_stats['payee_Company_Name'] == payee]
        
        if not orig_data.empty:
            orig_count = orig_data['count'].iloc[0]
            synth_count = synth_data['count'].iloc[0] if not synth_data.empty else 0
            orig_avg = orig_data['mean'].iloc[0]
            synth_avg = synth_data['mean'].iloc[0] if not synth_data.empty else 0
            orig_total = orig_data['sum'].iloc[0]
            synth_total = synth_data['sum'].iloc[0] if not synth_data.empty else 0
            
            count_diff = ((synth_count - orig_count) / orig_count * 100) if orig_count > 0 else 0
            avg_diff = ((synth_avg - orig_avg) / orig_avg * 100) if orig_avg > 0 else 0
            total_diff = ((synth_total - orig_total) / orig_total * 100) if orig_total > 0 else 0
            
            payee_analysis.append({
                'Payee': payee[:25] + '...' if len(payee) > 25 else payee,
                'Orig_Received': orig_count,
                'Synth_Received': synth_count,
                'Count_Diff_%': f"{count_diff:+.1f}%",
                'Orig_Avg_Amount': f"${orig_avg:,.0f}",
                'Synth_Avg_Amount': f"${synth_avg:,.0f}",
                'Amount_Diff_%': f"{avg_diff:+.1f}%",
                'Total_Volume_Diff_%': f"{total_diff:+.1f}%"
            })
    
    payee_df = pd.DataFrame(payee_analysis)
    print("\n💰 TOP PAYEE COMPARISON:")
    display(payee_df)
    
    # =============================================
    # SECTION 3: GICS-CENTRIC ANALYSIS
    # =============================================
    print("\n🏦 SECTION 3: GICS SECTOR ANALYSIS")
    print("=" * 50)
    
    # Analyze by GICS sectors (both payer and payee perspectives)
    print("\n📊 PAYER GICS SECTOR ANALYSIS:")
    orig_payer_gics = original.groupby('payer_GICS')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    synth_payer_gics = synthetic.groupby('payer_GICS')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    
    all_payer_gics = set(original['payer_GICS'].unique()) | set(synthetic['payer_GICS'].unique())
    
    payer_gics_analysis = []
    for gics in sorted(all_payer_gics):
        orig_data = orig_payer_gics[orig_payer_gics['payer_GICS'] == gics]
        synth_data = synth_payer_gics[synth_payer_gics['payer_GICS'] == gics]
        
        orig_count = orig_data['count'].iloc[0] if not orig_data.empty else 0
        synth_count = synth_data['count'].iloc[0] if not synth_data.empty else 0
        orig_total = orig_data['sum'].iloc[0] if not orig_data.empty else 0
        synth_total = synth_data['sum'].iloc[0] if not synth_data.empty else 0
        orig_avg = orig_data['mean'].iloc[0] if not orig_data.empty else 0
        synth_avg = synth_data['mean'].iloc[0] if not synth_data.empty else 0
        
        count_diff = ((synth_count - orig_count) / orig_count * 100) if orig_count > 0 else 0
        total_diff = ((synth_total - orig_total) / orig_total * 100) if orig_total > 0 else 0
        avg_diff = ((synth_avg - orig_avg) / orig_avg * 100) if orig_avg > 0 else 0
        
        payer_gics_analysis.append({
            'Payer_GICS_Sector': gics,
            'Orig_Transactions': orig_count,
            'Synth_Transactions': synth_count,
            'Count_Diff_%': f"{count_diff:+.1f}%",
            'Orig_Total_Volume': f"${orig_total:,.0f}",
            'Synth_Total_Volume': f"${synth_total:,.0f}",
            'Volume_Diff_%': f"{total_diff:+.1f}%",
            'Avg_Amount_Diff_%': f"{avg_diff:+.1f}%"
        })
    
    payer_gics_df = pd.DataFrame(payer_gics_analysis)
    display(payer_gics_df)
    
    print("\n📈 PAYEE GICS SECTOR ANALYSIS:")
    orig_payee_gics = original.groupby('payee_GICS')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    synth_payee_gics = synthetic.groupby('payee_GICS')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    
    all_payee_gics = set(original['payee_GICS'].unique()) | set(synthetic['payee_GICS'].unique())
    
    payee_gics_analysis = []
    for gics in sorted(all_payee_gics):
        orig_data = orig_payee_gics[orig_payee_gics['payee_GICS'] == gics]
        synth_data = synth_payee_gics[synth_payee_gics['payee_GICS'] == gics]
        
        orig_count = orig_data['count'].iloc[0] if not orig_data.empty else 0
        synth_count = synth_data['count'].iloc[0] if not synth_data.empty else 0
        orig_total = orig_data['sum'].iloc[0] if not orig_data.empty else 0
        synth_total = synth_data['sum'].iloc[0] if not synth_data.empty else 0
        
        count_diff = ((synth_count - orig_count) / orig_count * 100) if orig_count > 0 else 0
        total_diff = ((synth_total - orig_total) / orig_total * 100) if orig_total > 0 else 0
        
        payee_gics_analysis.append({
            'Payee_GICS_Sector': gics,
            'Orig_Received': orig_count,
            'Synth_Received': synth_count,
            'Count_Diff_%': f"{count_diff:+.1f}%",
            'Orig_Total_Received': f"${orig_total:,.0f}",
            'Synth_Total_Received': f"${synth_total:,.0f}",
            'Volume_Diff_%': f"{total_diff:+.1f}%"
        })
    
    payee_gics_df = pd.DataFrame(payee_gics_analysis)
    display(payee_gics_df)
    
    # =============================================
    # SECTION 4: INDUSTRY-CENTRIC ANALYSIS
    # =============================================
    print("\n🏭 SECTION 4: INDUSTRY-CENTRIC ANALYSIS")
    print("=" * 50)
    
    # Analyze by industries
    orig_industry_stats = original.groupby('payer_industry')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    synth_industry_stats = synthetic.groupby('payer_industry')['ed_amount'].agg(['count', 'mean', 'sum']).reset_index()
    
    all_industries = set(original['payer_industry'].unique()) | set(synthetic['payer_industry'].unique())
    
    industry_analysis = []
    for industry in sorted(all_industries):
        orig_data = orig_industry_stats[orig_industry_stats['payer_industry'] == industry]
        synth_data = synth_industry_stats[synth_industry_stats['payer_industry'] == industry]
        
        orig_count = orig_data['count'].iloc[0] if not orig_data.empty else 0
        synth_count = synth_data['count'].iloc[0] if not synth_data.empty else 0
        orig_avg = orig_data['mean'].iloc[0] if not orig_data.empty else 0
        synth_avg = synth_data['mean'].iloc[0] if not synth_data.empty else 0
        orig_total = orig_data['sum'].iloc[0] if not orig_data.empty else 0
        synth_total = synth_data['sum'].iloc[0] if not synth_data.empty else 0
        
        count_diff = ((synth_count - orig_count) / orig_count * 100) if orig_count > 0 else 0
        avg_diff = ((synth_avg - orig_avg) / orig_avg * 100) if orig_avg > 0 else 0
        total_diff = ((synth_total - orig_total) / orig_total * 100) if orig_total > 0 else 0
        
        industry_analysis.append({
            'Industry': industry,
            'Orig_Transactions': orig_count,
            'Synth_Transactions': synth_count,
            'Count_Diff_%': f"{count_diff:+.1f}%",
            'Orig_Avg_Amount': f"${orig_avg:,.0f}",
            'Synth_Avg_Amount': f"${synth_avg:,.0f}",
            'Amount_Diff_%': f"{avg_diff:+.1f}%",
            'Volume_Diff_%': f"{total_diff:+.1f}%"
        })
    
    industry_df = pd.DataFrame(industry_analysis)
    print("\n🏗️ INDUSTRY COMPARISON:")
    display(industry_df)
    
    # =============================================
    # SECTION 5: COMPREHENSIVE TEMPORAL ANALYSIS
    # =============================================
    print("\n🕐 SECTION 5: COMPREHENSIVE TEMPORAL ANALYSIS")
    print("=" * 50)
    
    # Helper functions for temporal analysis
    def categorize_time(time_val):
        hour = int(time_val) // 100
        if 6 <= hour < 12:
            return 'Morning'
        elif 12 <= hour < 18:
            return 'Afternoon'
        elif 18 <= hour < 22:
            return 'Evening'
        else:
            return 'Night'
    
    def is_business_hours(time_val):
        hour = int(time_val) // 100
        return 8 <= hour < 18
    
    def is_peak_hours(time_val):
        hour = int(time_val) // 100
        return hour in [9, 10, 11, 14, 15, 16]  # Peak business hours
    
    def parse_date_month(date_val):
        date_str = str(int(date_val))
        if len(date_str) == 6:
            return int(date_str[2:4])  # Extract month
        return None
    
    # Create temporal features
    orig_temp = original.copy()
    synth_temp = synthetic.copy()
    
    orig_temp['time_category'] = orig_temp['fh_file_creation_time'].apply(categorize_time)
    synth_temp['time_category'] = synth_temp['fh_file_creation_time'].apply(categorize_time)
    
    orig_temp['business_hours'] = orig_temp['fh_file_creation_time'].apply(is_business_hours)
    synth_temp['business_hours'] = synth_temp['fh_file_creation_time'].apply(is_business_hours)
    
    orig_temp['peak_hours'] = orig_temp['fh_file_creation_time'].apply(is_peak_hours)
    synth_temp['peak_hours'] = synth_temp['fh_file_creation_time'].apply(is_peak_hours)
    
    orig_temp['hour'] = (orig_temp['fh_file_creation_time'] // 100).astype(int)
    synth_temp['hour'] = (synth_temp['fh_file_creation_time'] // 100).astype(int)
    
    # 5A: TIME OF DAY ANALYSIS
    print("\n⏰ TIME OF DAY PATTERN ANALYSIS:")
    time_analysis = []
    
    for time_cat in ['Morning', 'Afternoon', 'Evening', 'Night']:
        orig_data = orig_temp[orig_temp['time_category'] == time_cat]
        synth_data = synth_temp[synth_temp['time_category'] == time_cat]
        
        orig_count = len(orig_data)
        synth_count = len(synth_data)
        orig_pct = (orig_count / len(orig_temp)) * 100
        synth_pct = (synth_count / len(synth_temp)) * 100
        orig_avg_amount = orig_data['ed_amount'].mean() if orig_count > 0 else 0
        synth_avg_amount = synth_data['ed_amount'].mean() if synth_count > 0 else 0
        orig_total_volume = orig_data['ed_amount'].sum() if orig_count > 0 else 0
        synth_total_volume = synth_data['ed_amount'].sum() if synth_count > 0 else 0
        
        pct_diff = synth_pct - orig_pct
        amount_diff = ((synth_avg_amount - orig_avg_amount) / orig_avg_amount * 100) if orig_avg_amount > 0 else 0
        volume_diff = ((synth_total_volume - orig_total_volume) / orig_total_volume * 100) if orig_total_volume > 0 else 0
        
        time_analysis.append({
            'Time_Period': time_cat,
            'Orig_%_of_Total': f"{orig_pct:.1f}%",
            'Synth_%_of_Total': f"{synth_pct:.1f}%",
            'Percentage_Diff': f"{pct_diff:+.1f}pp",
            'Orig_Avg_Amount': f"${orig_avg_amount:,.0f}",
            'Synth_Avg_Amount': f"${synth_avg_amount:,.0f}",
            'Amount_Diff_%': f"{amount_diff:+.1f}%",
            'Volume_Diff_%': f"{volume_diff:+.1f}%"
        })
    
    time_df = pd.DataFrame(time_analysis)
    display(time_df)
    
    # 5B: BUSINESS HOURS VS AFTER HOURS
    print("\n🏢 BUSINESS HOURS VS AFTER HOURS ANALYSIS:")
    business_hours_analysis = []
    
    for is_bh in [True, False]:
        period_name = 'Business Hours (8AM-6PM)' if is_bh else 'After Hours'
        orig_data = orig_temp[orig_temp['business_hours'] == is_bh]
        synth_data = synth_temp[synth_temp['business_hours'] == is_bh]
        
        orig_count = len(orig_data)
        synth_count = len(synth_data)
        orig_pct = (orig_count / len(orig_temp)) * 100
        synth_pct = (synth_count / len(synth_temp)) * 100
        orig_avg_amount = orig_data['ed_amount'].mean() if orig_count > 0 else 0
        synth_avg_amount = synth_data['ed_amount'].mean() if synth_count > 0 else 0
        orig_total_volume = orig_data['ed_amount'].sum() if orig_count > 0 else 0
        synth_total_volume = synth_data['ed_amount'].sum() if synth_count > 0 else 0
        
        pct_diff = synth_pct - orig_pct
        amount_diff = ((synth_avg_amount - orig_avg_amount) / orig_avg_amount * 100) if orig_avg_amount > 0 else 0
        volume_diff = ((synth_total_volume - orig_total_volume) / orig_total_volume * 100) if orig_total_volume > 0 else 0
        
        business_hours_analysis.append({
            'Period': period_name,
            'Orig_%_of_Total': f"{orig_pct:.1f}%",
            'Synth_%_of_Total': f"{synth_pct:.1f}%",
            'Percentage_Diff': f"{pct_diff:+.1f}pp",
            'Orig_Avg_Amount': f"${orig_avg_amount:,.0f}",
            'Synth_Avg_Amount': f"${synth_avg_amount:,.0f}",
            'Amount_Diff_%': f"{amount_diff:+.1f}%",
            'Volume_Diff_%': f"{volume_diff:+.1f}%"
        })
    
    bh_df = pd.DataFrame(business_hours_analysis)
    display(bh_df)
    
    # 5C: PEAK VS OFF-PEAK HOURS ANALYSIS
    print("\n📈 PEAK VS OFF-PEAK BUSINESS HOURS:")
    peak_analysis = []
    
    for is_peak in [True, False]:
        period_name = 'Peak Hours (9-11AM, 2-4PM)' if is_peak else 'Off-Peak Business Hours'
        
        if is_peak:
            orig_data = orig_temp[orig_temp['peak_hours'] == True]
            synth_data = synth_temp[synth_temp['peak_hours'] == True]
        else:
            # Off-peak business hours (8AM-6PM excluding peak)
            orig_data = orig_temp[(orig_temp['business_hours'] == True) & (orig_temp['peak_hours'] == False)]
            synth_data = synth_temp[(synth_temp['business_hours'] == True) & (synth_temp['peak_hours'] == False)]
        
        orig_count = len(orig_data)
        synth_count = len(synth_data)
        orig_pct = (orig_count / len(orig_temp)) * 100
        synth_pct = (synth_count / len(synth_temp)) * 100
        orig_avg_amount = orig_data['ed_amount'].mean() if orig_count > 0 else 0
        synth_avg_amount = synth_data['ed_amount'].mean() if synth_count > 0 else 0
        
        pct_diff = synth_pct - orig_pct
        amount_diff = ((synth_avg_amount - orig_avg_amount) / orig_avg_amount * 100) if orig_avg_amount > 0 else 0
        
        peak_analysis.append({
            'Hour_Group': period_name,
            'Orig_%_of_Total': f"{orig_pct:.1f}%",
            'Synth_%_of_Total': f"{synth_pct:.1f}%",
            'Percentage_Diff': f"{pct_diff:+.1f}pp",
            'Orig_Avg_Amount': f"${orig_avg_amount:,.0f}",
            'Synth_Avg_Amount': f"${synth_avg_amount:,.0f}",
            'Amount_Diff_%': f"{amount_diff:+.1f}%"
        })
    
    peak_df = pd.DataFrame(peak_analysis)
    display(peak_df)
    
    # =============================================
    # SECTION 6: CROSS-DIMENSIONAL FLOW ANALYSIS
    # =============================================
    print("\n🔄 SECTION 6: CROSS-DIMENSIONAL FLOW ANALYSIS")
    print("=" * 50)
    
    # 6A: GICS-to-GICS FLOWS
    print("\n🏦→🏦 GICS SECTOR FLOW ANALYSIS:")
    orig_flows = original.groupby(['payer_GICS', 'payee_GICS'])['ed_amount'].agg(['count', 'sum']).reset_index()
    synth_flows = synthetic.groupby(['payer_GICS', 'payee_GICS'])['ed_amount'].agg(['count', 'sum']).reset_index()
    
    # Get top 10 flow patterns by volume
    top_flows = orig_flows.nlargest(10, 'sum')
    
    flow_analysis = []
    for _, flow in top_flows.iterrows():
        payer_gics = flow['payer_GICS']
        payee_gics = flow['payee_GICS']
        
        orig_count = flow['count']
        orig_volume = flow['sum']
        
        synth_flow = synth_flows[
            (synth_flows['payer_GICS'] == payer_gics) & 
            (synth_flows['payee_GICS'] == payee_gics)
        ]
        
        synth_count = synth_flow['count'].iloc[0] if not synth_flow.empty else 0
        synth_volume = synth_flow['sum'].iloc[0] if not synth_flow.empty else 0
        
        count_diff = ((synth_count - orig_count) / orig_count * 100) if orig_count > 0 else 0
        volume_diff = ((synth_volume - orig_volume) / orig_volume * 100) if orig_volume > 0 else 0
        
        flow_pattern = f"{payer_gics[:12]}→{payee_gics[:12]}"
        
        flow_analysis.append({
            'Flow_Pattern': flow_pattern,
            'Orig_Count': orig_count,
            'Synth_Count': synth_count,
            'Count_Diff_%': f"{count_diff:+.1f}%",
            'Orig_Volume': f"${orig_volume:,.0f}",
            'Synth_Volume': f"${synth_volume:,.0f}",
            'Volume_Diff_%': f"{volume_diff:+.1f}%"
        })
    
    flow_df = pd.DataFrame(flow_analysis)
    display(flow_df)
    
    # 6B: TEMPORAL-INDUSTRY INTERACTION
    print("\n🏭⏰ TEMPORAL-INDUSTRY INTERACTION ANALYSIS:")
    temp_industry_analysis = []
    
    # Get top 3 industries to avoid overwhelming output
    top_industries = orig_temp['payer_industry'].value_counts().head(3).index.tolist()
    
    for industry in top_industries:
        for time_cat in ['Morning', 'Afternoon', 'Evening']:
            orig_data = orig_temp[
                (orig_temp['payer_industry'] == industry) & 
                (orig_temp['time_category'] == time_cat)
            ]
            synth_data = synth_temp[
                (synth_temp['payer_industry'] == industry) & 
                (synth_temp['time_category'] == time_cat)
            ]
            
            orig_count = len(orig_data)
            synth_count = len(synth_data)
            
            if orig_count > 2:  # Only include meaningful combinations
                # Calculate percentage of industry's transactions in this time period
                industry_orig_total = len(orig_temp[orig_temp['payer_industry'] == industry])
                industry_synth_total = len(synth_temp[synth_temp['payer_industry'] == industry])
                
                orig_time_pct = (orig_count / industry_orig_total * 100) if industry_orig_total > 0 else 0
                synth_time_pct = (synth_count / industry_synth_total * 100) if industry_synth_total > 0 else 0
                
                time_pct_diff = synth_time_pct - orig_time_pct
                
                orig_avg_amount = orig_data['ed_amount'].mean() if orig_count > 0 else 0
                synth_avg_amount = synth_data['ed_amount'].mean() if synth_count > 0 else 0
                amount_diff = ((synth_avg_amount - orig_avg_amount) / orig_avg_amount * 100) if orig_avg_amount > 0 else 0
                
                temp_industry_analysis.append({
                    'Industry': industry[:20],
                    'Time_Period': time_cat,
                    'Orig_Count': orig_count,
                    'Synth_Count': synth_count,
                    'Orig_%_of_Industry': f"{orig_time_pct:.1f}%",
                    'Synth_%_of_Industry': f"{synth_time_pct:.1f}%",
                    'Time_Pattern_Diff': f"{time_pct_diff:+.1f}pp",
                    'Amount_Diff_%': f"{amount_diff:+.1f}%"
                })
    
    if temp_industry_analysis:
        temp_industry_df = pd.DataFrame(temp_industry_analysis)
        display(temp_industry_df)
    
    # 6C: TEMPORAL-GICS INTERACTION
    print("\n🏦⏰ TEMPORAL-GICS SECTOR INTERACTION:")
    temp_gics_analysis = []
    
    # Get top 3 GICS sectors
    top_gics = orig_temp['payer_GICS'].value_counts().head(3).index.tolist()
    
    for gics in top_gics:
        # Business hours vs after hours for each GICS sector
        for is_bh in [True, False]:
            period_name = 'Business Hours' if is_bh else 'After Hours'
            
            orig_data = orig_temp[
                (orig_temp['payer_GICS'] == gics) & 
                (orig_temp['business_hours'] == is_bh)
            ]
            synth_data = synth_temp[
                (synth_temp['payer_GICS'] == gics) & 
                (synth_temp['business_hours'] == is_bh)
            ]
            
            orig_count = len(orig_data)
            synth_count = len(synth_data)
            
            if orig_count > 2:  # Only meaningful combinations
                # Calculate percentage of GICS sector's transactions in this period
                gics_orig_total = len(orig_temp[orig_temp['payer_GICS'] == gics])
                gics_synth_total = len(synth_temp[synth_temp['payer_GICS'] == gics])
                
                orig_time_pct = (orig_count / gics_orig_total * 100) if gics_orig_total > 0 else 0
                synth_time_pct = (synth_count / gics_synth_total * 100) if gics_synth_total > 0 else 0
                
                time_pct_diff = synth_time_pct - orig_time_pct
                
                orig_avg_amount = orig_data['ed_amount'].mean() if orig_count > 0 else 0
                synth_avg_amount = synth_data['ed_amount'].mean() if synth_count > 0 else 0
                amount_diff = ((synth_avg_amount - orig_avg_amount) / orig_avg_amount * 100) if orig_avg_amount > 0 else 0
                
                temp_gics_analysis.append({
                    'GICS_Sector': gics[:20],
                    'Time_Period': period_name,
                    'Orig_Count': orig_count,
                    'Synth_Count': synth_count,
                    'Orig_%_of_Sector': f"{orig_time_pct:.1f}%",
                    'Synth_%_of_Sector': f"{synth_time_pct:.1f}%",
                    'Time_Pattern_Diff': f"{time_pct_diff:+.1f}pp",
                    'Amount_Diff_%': f"{amount_diff:+.1f}%"
                })
    
    if temp_gics_analysis:
        temp_gics_df = pd.DataFrame(temp_gics_analysis)
        display(temp_gics_df)
    
    return {
        'payer_analysis': payer_df,
        'payee_analysis': payee_df,
        'gics_analysis': {'payer': payer_gics_df, 'payee': payee_gics_df},
        'industry_analysis': industry_df,
        'temporal_analysis': {'time_of_day': time_df, 'business_hours': bh_df, 'peak_hours': peak_df},
        'flow_analysis': flow_df,
        'interaction_analysis': {
            'temporal_industry': temp_industry_df if temp_industry_analysis else None,
            'temporal_gics': temp_gics_df if temp_gics_analysis else None
        }
    }

# Run complete multi-dimensional validation
print("\n🚀 Running complete multi-dimensional validation analysis...")
complete_results = complete_multidimensional_validation(original_data, synthetic_data)

print("\n" + "=" * 80)
print("✅ COMPLETE MULTI-DIMENSIONAL VALIDATION FINISHED")
print("📊 All business dimensions analyzed:")
print("   ✓ Payer-centric patterns")
print("   ✓ Payee-centric patterns")
print("   ✓ GICS sector analysis")
print("   ✓ Industry analysis")
print("   ✓ Temporal patterns (time blocks)")
print("   ✓ Cross-dimensional interactions")
print("   ✓ Flow analysis")