# 2.4 Boiler Fouling Dataset EDA - Validated Simulation

**Exploratory Data Analysis of Massachusetts Boiler Annual Validated Simulation Dataset**

This notebook performs comprehensive exploratory data analysis on the validated boiler simulation dataset.

**Dataset Characteristics:**
- **Records**: 8,784 hourly observations  
- **Features**: 219 columns covering operational, fouling, soot blowing, and performance metrics
- **Date Range**: 2024-01-01 00:00:00 to 2024-12-31 23:00:00

---

In [None]:
%load_ext autoreload
%autoreload 2

# Add source folders to path
import sys, os
SRC = '../src'
src_folders = list(filter(lambda x: os.path.isdir(os.path.join(SRC,x)), os.listdir(SRC)))
[sys.path.append(os.path.join(SRC,folder)) for folder in src_folders]

# Import custom libraries
try:
    from preamble import *
    from helpers import save_joblib
except ImportError as e:
    print(f"Custom libraries not available: {e}")
    print("Proceeding with standard libraries only")

# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime
from pathlib import Path

# Machine Learning
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from scipy import stats
from scipy.stats import pearsonr

# Configuration
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

# Set random seeds for reproducibility
np.random.seed(42)

# Dataset file paths
DATA_FILE = '../data/generated/annual_datasets/massachusetts_boiler_annual_20250827_184816.csv'
METADATA_FILE = '../outputs/metadata/massachusetts_boiler_annual_metadata_20250827_184816.txt'

In [None]:
# Configuration parameters
class Config:
    # Data parameters
    CSV_FILE = DATA_FILE
    METADATA_FILE = METADATA_FILE
    
    # Analysis parameters
    SEQUENCE_LENGTH = 24  # Hours of history for time series analysis
    PREDICTION_HORIZON = 1  # Hours ahead for predictions
    
    # Statistical parameters
    CORRELATION_THRESHOLD = 0.7  # High correlation threshold
    OUTLIER_THRESHOLD = 3  # Z-score threshold for outliers
    
    # Performance thresholds
    EFFICIENCY_TARGET_MIN = 0.75  # 75% minimum efficiency
    EFFICIENCY_TARGET_MAX = 0.88  # 88% maximum efficiency
    LOAD_FACTOR_MIN = 0.60  # 60% minimum load
    LOAD_FACTOR_MAX = 1.05  # 105% maximum load
    
    # Fouling analysis thresholds
    FOULING_WARNING_THRESHOLD = 1.10  # 10% fouling increase
    FOULING_MAINTENANCE_THRESHOLD = 1.15  # 15% fouling increase
    
    # Boiler sections for analysis
    BOILER_SECTIONS = [
        'furnace', 'generating_bank', 'superheater_1', 'superheater_2',
        'economizer_1', 'economizer_2', 'air_heater'
    ]

config = Config()
print(f"Configuration loaded for validated simulation analysis")

In [None]:
def load_validated_boiler_data(csv_file, metadata_file=None):        # Check if file exists    if not os.path.exists(csv_file):        return None        # Load metadata if available    if metadata_file and os.path.exists(metadata_file):        try:            with open(metadata_file, 'r') as f:                metadata = f.read()                if 'Records generated:' in metadata:                    records_line = [line for line in metadata.split('\n') if 'Records generated:' in line][0]        except Exception as e:        # Load data    try:        data = pd.read_csv(csv_file)    except Exception as e:        return None        # Parse timestamp and sort    if 'timestamp' in data.columns:        data['timestamp'] = pd.to_datetime(data['timestamp'])        data = data.sort_values('timestamp').reset_index(drop=True)                # Check for time gaps        time_diffs = data['timestamp'].diff().dt.total_seconds() / 3600        expected_interval = 1.0  # 1 hour        gaps = time_diffs[time_diffs > expected_interval * 1.1]        if len(gaps) > 0:        else:        # Check data quality        # Missing values    missing_summary = data.isnull().sum()    missing_cols = missing_summary[missing_summary > 0]        if len(missing_cols) > 0:        for col, count in missing_cols.head(10).items():    else:        # Check for constant columns    constant_cols = []    for col in data.select_dtypes(include=[np.number]).columns:        if data[col].nunique() <= 1:            constant_cols.append(col)        if constant_cols:    else:        # Convergence check    if 'solution_converged' in data.columns:        convergence_rate = data['solution_converged'].mean()        if convergence_rate < 0.95:        else:        return data# Load the validated simulation datadata = load_validated_boiler_data(config.CSV_FILE, config.METADATA_FILE)if data is not None:    # Display first few rows of key columns    key_cols = ['timestamp', 'load_factor', 'system_efficiency', 'coal_quality',     display_cols = [col for col in key_cols if col in data.columns]        sample_data = data[display_cols].head()    else:

In [None]:
def analyze_operational_performance(data, config):        # Key performance metrics analysis    performance_metrics = {        'system_efficiency': 'System Efficiency (%)',        'load_factor': 'Load Factor (%)',        'final_steam_temp_F': 'Steam Temperature (°F)',        'stack_temp_F': 'Stack Temperature (°F)',        'fuel_input_btu_hr': 'Fuel Input (BTU/hr)',        'combustion_efficiency': 'Combustion Efficiency (%)'        results = {}        for metric, label in performance_metrics.items():        if metric in data.columns:            stats = data[metric].describe()                        # Convert to percentage if applicable            if 'efficiency' in metric or 'factor' in metric:                if data[metric].max() <= 1.0:  # Already in decimal form                    display_stats = stats * 100                    unit = '%'                else:                    display_stats = stats                    unit = ''            else:                display_stats = stats                unit = ''                        results[metric] = {                        # Check against targets if applicable            if metric == 'system_efficiency':                if data[metric].max() <= 1.0:  # Decimal form                    in_range = ((data[metric] >= config.EFFICIENCY_TARGET_MIN) &                               (data[metric] <= config.EFFICIENCY_TARGET_MAX)).mean()                else:  # Percentage form                    in_range = ((data[metric] >= config.EFFICIENCY_TARGET_MIN * 100) &                               (data[metric] <= config.EFFICIENCY_TARGET_MAX * 100)).mean()                            elif metric == 'load_factor':                if data[metric].max() <= 1.0:  # Decimal form                    in_range = ((data[metric] >= config.LOAD_FACTOR_MIN) &                               (data[metric] <= config.LOAD_FACTOR_MAX)).mean()                else:  # Percentage form                    in_range = ((data[metric] >= config.LOAD_FACTOR_MIN * 100) &                               (data[metric] <= config.LOAD_FACTOR_MAX * 100)).mean()        return resultsdef analyze_coal_quality_impact(data):        if 'coal_quality' not in data.columns:        return None        # Coal quality distribution    coal_dist = data['coal_quality'].value_counts(normalize=True).sort_values(ascending=False)    for quality, pct in coal_dist.items():        # Performance by coal quality    performance_metrics = ['system_efficiency', 'combustion_efficiency', 'total_nox_lb_hr']        coal_performance = {}    for metric in performance_metrics:        if metric in data.columns:            by_quality = data.groupby('coal_quality')[metric].agg(['mean', 'std', 'count'])            coal_performance[metric] = by_quality                        for quality in by_quality.index:                mean_val = by_quality.loc[quality, 'mean']                std_val = by_quality.loc[quality, 'std']                                # Format based on metric type                if 'efficiency' in metric and mean_val <= 1.0:                else:        return coal_performancedef analyze_soot_blowing_patterns(data):        soot_cols = [col for col in data.columns if 'soot' in col.lower() and 'blowing' in col.lower()]    if not soot_cols:        return None        # Overall soot blowing activity    if 'soot_blowing_active' in data.columns:        soot_events = data['soot_blowing_active'].sum()        soot_frequency = soot_events / len(data)        # Section-specific soot blowing    section_soot_cols = [col for col in soot_cols if any(section in col for section in config.BOILER_SECTIONS)]    if section_soot_cols:        for col in section_soot_cols[:7]:  # Show first 7 sections            if data[col].dtype == 'bool' or data[col].nunique() <= 2:                events = data[col].sum()                frequency = events / len(data)                section_name = col.replace('_soot_blowing_active', '').replace('_', ' ').title()        # Effectiveness analysis    if 'avg_cleaning_effectiveness' in data.columns:        effectiveness = data['avg_cleaning_effectiveness']        active_effectiveness = effectiveness[effectiveness > 0]        if len(active_effectiveness) > 0:        # Performance impact    if 'soot_blowing_active' in data.columns and 'system_efficiency' in data.columns:        soot_impact = data.groupby('soot_blowing_active')['system_efficiency'].agg(['mean', 'std', 'count'])        for active, stats in soot_impact.iterrows():            status = "During Soot Blowing" if active else "Normal Operation"            eff_mean = stats['mean']            if eff_mean <= 1.0:            else:                # Statistical significance test        normal_eff = data[~data['soot_blowing_active']]['system_efficiency']        soot_eff = data[data['soot_blowing_active']]['system_efficiency']        if len(soot_eff) > 30 and len(normal_eff) > 30:  # Sufficient sample size            from scipy.stats import ttest_ind            t_stat, p_value = ttest_ind(normal_eff, soot_eff)def analyze_seasonal_patterns(data):        if 'timestamp' not in data.columns:        return None        # Add temporal features    data_temp = data.copy()    data_temp['month'] = data_temp['timestamp'].dt.month    data_temp['hour'] = data_temp['timestamp'].dt.hour    data_temp['day_of_week'] = data_temp['timestamp'].dt.dayofweek    data_temp['quarter'] = data_temp['timestamp'].dt.quarter        # Monthly patterns    if 'system_efficiency' in data.columns:        monthly_efficiency = data_temp.groupby('month')['system_efficiency'].agg(['mean', 'std'])        month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',                best_month = monthly_efficiency['mean'].idxmax()        worst_month = monthly_efficiency['mean'].idxmin()                eff_range = monthly_efficiency['mean']        if eff_range.max() <= 1.0:        else:        # Hourly patterns    if 'load_factor' in data.columns:        hourly_load = data_temp.groupby('hour')['load_factor'].mean()        peak_hour = hourly_load.idxmax()        low_hour = hourly_load.idxmin()                if hourly_load.max() <= 1.0:        else:        return {# Run comprehensive analysis if data loaded successfullyif data is not None:        # Operational performance analysis    operational_results = analyze_operational_performance(data, config)        # Coal quality impact analysis    coal_results = analyze_coal_quality_impact(data)        # Soot blowing analysis    soot_results = analyze_soot_blowing_patterns(data)        # Seasonal patterns    seasonal_results = analyze_seasonal_patterns(data)    else:

In [None]:
def create_performance_dashboard(data, config):        if data is None:        # Create main dashboard figure    fig = plt.figure(figsize=(20, 16))                fontsize=16, fontweight='bold', y=0.98)        # 1. System efficiency time series (top left)    ax1 = plt.subplot(3, 4, 1)    if 'system_efficiency' in data.columns and 'timestamp' in data.columns:        # Sample data for readability (every 24th point for daily averages)        sample_data = data.iloc[::24].copy()                efficiency_vals = sample_data['system_efficiency']        if efficiency_vals.max() <= 1.0:            efficiency_vals = efficiency_vals * 100                ax1.plot(sample_data['timestamp'], efficiency_vals, alpha=0.7, linewidth=1)        ax1.axhline(y=config.EFFICIENCY_TARGET_MIN*100, color='red', linestyle='--', alpha=0.5, label='Min Target')        ax1.axhline(y=config.EFFICIENCY_TARGET_MAX*100, color='red', linestyle='--', alpha=0.5, label='Max Target')        ax1.set_title('System Efficiency Over Time')        ax1.set_ylabel('Efficiency (%)')        ax1.grid(True, alpha=0.3)        ax1.legend(fontsize=8)        ax1.tick_params(axis='x', rotation=45, labelsize=8)        # 2. Load factor distribution (top center-left)    ax2 = plt.subplot(3, 4, 2)    if 'load_factor' in data.columns:        load_vals = data['load_factor']        if load_vals.max() <= 1.0:            load_vals = load_vals * 100                ax2.hist(load_vals, bins=50, alpha=0.7, edgecolor='black')        ax2.axvline(x=config.LOAD_FACTOR_MIN*100, color='red', linestyle='--', alpha=0.5)        ax2.axvline(x=config.LOAD_FACTOR_MAX*100, color='red', linestyle='--', alpha=0.5)        ax2.set_title('Load Factor Distribution')        ax2.set_xlabel('Load Factor (%)')        ax2.set_ylabel('Frequency')        ax2.grid(True, alpha=0.3)        # 3. Coal quality impact on efficiency (top center-right)    ax3 = plt.subplot(3, 4, 3)    if 'coal_quality' in data.columns and 'system_efficiency' in data.columns:        coal_eff = data.groupby('coal_quality')['system_efficiency'].mean().sort_values(ascending=False)                if coal_eff.max() <= 1.0:            coal_eff = coal_eff * 100                bars = ax3.bar(range(len(coal_eff)), coal_eff.values, alpha=0.7)        ax3.set_title('Efficiency by Coal Quality')        ax3.set_ylabel('Average Efficiency (%)')        ax3.set_xticks(range(len(coal_eff)))        ax3.set_xticklabels(coal_eff.index, rotation=45, ha='right', fontsize=8)        ax3.grid(True, alpha=0.3)                # Color bars by performance        colors = plt.cm.RdYlGn(np.linspace(0.3, 1, len(coal_eff)))        for bar, color in zip(bars, colors):            bar.set_color(color)        # 4. Temperature correlation (top right)    ax4 = plt.subplot(3, 4, 4)    temp_cols = ['ambient_temp_F', 'flame_temp_F', 'final_steam_temp_F', 'stack_temp_F']    available_temp_cols = [col for col in temp_cols if col in data.columns]        if len(available_temp_cols) >= 2:        # Create correlation matrix for temperature columns        temp_corr = data[available_temp_cols].corr()        im = ax4.imshow(temp_corr, cmap='coolwarm', aspect='auto', vmin=-1, vmax=1)        ax4.set_title('Temperature Correlations')        ax4.set_xticks(range(len(available_temp_cols)))        ax4.set_yticks(range(len(available_temp_cols)))        ax4.set_xticklabels([col.replace('_', '\n').replace('temp', 'T').replace('F', '')                             for col in available_temp_cols], fontsize=8, rotation=45)        ax4.set_yticklabels([col.replace('_', '\n').replace('temp', 'T').replace('F', '')                             for col in available_temp_cols], fontsize=8)                # Add correlation values        for i in range(len(available_temp_cols)):            for j in range(len(available_temp_cols)):                        ha='center', va='center', fontsize=8)                plt.colorbar(im, ax=ax4, shrink=0.6)        # 5. Soot blowing effectiveness (middle left)    ax5 = plt.subplot(3, 4, 5)    if 'soot_blowing_active' in data.columns and 'system_efficiency' in data.columns:        soot_eff = data.groupby('soot_blowing_active')['system_efficiency'].mean()                if soot_eff.max() <= 1.0:            soot_eff = soot_eff * 100                labels = ['Normal Operation', 'During Soot Blowing']        colors = ['lightblue', 'orange']        bars = ax5.bar(labels, soot_eff.values, alpha=0.7, color=colors)        ax5.set_title('Efficiency During Soot Blowing')        ax5.set_ylabel('Average Efficiency (%)')        ax5.grid(True, alpha=0.3)        ax5.tick_params(axis='x', rotation=45, labelsize=8)                # Add value labels on bars        for bar, val in zip(bars, soot_eff.values):            ax5.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,                    f'{val:.1f}%', ha='center', va='bottom', fontsize=8)        # 6. Monthly efficiency pattern (middle center-left)    ax6 = plt.subplot(3, 4, 6)    if 'timestamp' in data.columns and 'system_efficiency' in data.columns:        data_temp = data.copy()        data_temp['month'] = data_temp['timestamp'].dt.month        monthly_eff = data_temp.groupby('month')['system_efficiency'].mean()                if monthly_eff.max() <= 1.0:            monthly_eff = monthly_eff * 100                month_names = ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D']        ax6.plot(monthly_eff.index, monthly_eff.values, 'o-', linewidth=2, markersize=6)        ax6.set_title('Monthly Efficiency Pattern')        ax6.set_ylabel('Efficiency (%)')        ax6.set_xlabel('Month')        ax6.set_xticks(range(1, 13))        ax6.set_xticklabels(month_names)        ax6.grid(True, alpha=0.3)        # 7. Emissions overview (middle center-right)    ax7 = plt.subplot(3, 4, 7)    emissions_cols = ['total_nox_lb_hr', 'so2_lb_hr', 'co2_lb_hr', 'particulates_lb_hr']    available_emissions = [col for col in emissions_cols if col in data.columns]        if available_emissions:        emissions_data = []        emissions_labels = []                for col in available_emissions[:4]:  # Limit to 4 for readability            if data[col].max() > 0:  # Only include if there's actual data                emissions_data.append(data[col].mean())                emissions_labels.append(col.replace('_lb_hr', '').replace('_', ' ').title())                if emissions_data:            wedges, texts, autotexts = ax7.pie(emissions_data, labels=emissions_labels,                                               autopct='%1.1f%%', startangle=90)            ax7.set_title('Average Emissions Distribution')            # Make text smaller            for text in texts + autotexts:                text.set_fontsize(8)        # 8. Load factor vs efficiency scatter (middle right)    ax8 = plt.subplot(3, 4, 8)    if 'load_factor' in data.columns and 'system_efficiency' in data.columns:        # Sample for performance        sample = data.sample(min(2000, len(data)))                load_vals = sample['load_factor']        eff_vals = sample['system_efficiency']                if load_vals.max() <= 1.0:            load_vals = load_vals * 100        if eff_vals.max() <= 1.0:            eff_vals = eff_vals * 100                scatter = ax8.scatter(load_vals, eff_vals, alpha=0.5, s=10)        ax8.set_title('Load Factor vs Efficiency')        ax8.set_xlabel('Load Factor (%)')        ax8.set_ylabel('System Efficiency (%)')        ax8.grid(True, alpha=0.3)                # Add trend line        z = np.polyfit(load_vals, eff_vals, 1)        p = np.poly1d(z)        ax8.plot(load_vals, p(load_vals), "r--", alpha=0.8, linewidth=1)        # 9. Fouling factor distribution (bottom left)    ax9 = plt.subplot(3, 4, 9)    fouling_cols = [col for col in data.columns if 'fouling_factor' in col]    if fouling_cols:        # Get first few fouling factors        fouling_data = []        fouling_labels = []                for col in fouling_cols[:6]:  # Limit to 6 sections            fouling_data.append(data[col].mean())            section_name = col.replace('_fouling_factor', '').replace('_', ' ').title()            fouling_labels.append(section_name)                bars = ax9.bar(range(len(fouling_data)), fouling_data, alpha=0.7)        ax9.set_title('Average Fouling Factors by Section')        ax9.set_ylabel('Fouling Factor')        ax9.set_xticks(range(len(fouling_labels)))        ax9.set_xticklabels(fouling_labels, rotation=45, ha='right', fontsize=8)        ax9.grid(True, alpha=0.3)        ax9.axhline(y=1.0, color='green', linestyle='-', alpha=0.5, label='Clean')        ax9.axhline(y=config.FOULING_WARNING_THRESHOLD, color='orange', linestyle='--', alpha=0.5, label='Warning')        ax9.axhline(y=config.FOULING_MAINTENANCE_THRESHOLD, color='red', linestyle='--', alpha=0.5, label='Maintenance')        ax9.legend(fontsize=8)                # Color bars based on fouling level        for bar, value in zip(bars, fouling_data):            if value >= config.FOULING_MAINTENANCE_THRESHOLD:                bar.set_color('red')            elif value >= config.FOULING_WARNING_THRESHOLD:                bar.set_color('orange')            else:                bar.set_color('green')        # 10. Daily load pattern (bottom center-left)    ax10 = plt.subplot(3, 4, 10)    if 'timestamp' in data.columns and 'load_factor' in data.columns:        data_temp = data.copy()        data_temp['hour'] = data_temp['timestamp'].dt.hour        hourly_load = data_temp.groupby('hour')['load_factor'].mean()                if hourly_load.max() <= 1.0:            hourly_load = hourly_load * 100                ax10.plot(hourly_load.index, hourly_load.values, 'o-', linewidth=2, markersize=4)        ax10.set_title('Daily Load Pattern')        ax10.set_ylabel('Load Factor (%)')        ax10.set_xlabel('Hour of Day')        ax10.set_xticks(range(0, 24, 4))        ax10.grid(True, alpha=0.3)        ax10.fill_between(hourly_load.index, hourly_load.values, alpha=0.3)        # 11. System convergence rate (bottom center-right)    ax11 = plt.subplot(3, 4, 11)    if 'solution_converged' in data.columns:        convergence_rate = data['solution_converged'].mean()                # Create a simple gauge chart        labels = ['Converged', 'Not Converged']        sizes = [convergence_rate, 1 - convergence_rate]        colors = ['green' if convergence_rate >= 0.95 else 'orange', 'red']                wedges, texts, autotexts = ax11.pie(sizes, labels=labels, autopct='%1.1f%%',                                            colors=colors, startangle=90)        ax11.set_title(f'Solution Convergence Rate\n({convergence_rate:.1%})')                # Make text smaller        for text in texts + autotexts:            text.set_fontsize(8)        # 12. Data quality summary (bottom right)    ax12 = plt.subplot(3, 4, 12)        # Create data quality metrics    quality_metrics = []    quality_labels = []        # Missing data percentage    missing_pct = (data.isnull().sum().sum() / (len(data) * len(data.columns))) * 100    quality_metrics.append(max(0, 100 - missing_pct))    quality_labels.append(f'Completeness\n({100-missing_pct:.1f}%)')        # Convergence rate    if 'solution_converged' in data.columns:        conv_score = data['solution_converged'].mean() * 100        quality_metrics.append(conv_score)        quality_labels.append(f'Convergence\n({conv_score:.1f}%)')        # Efficiency within target range    if 'system_efficiency' in data.columns:        eff_vals = data['system_efficiency']        if eff_vals.max() <= 1.0:            in_range = ((eff_vals >= config.EFFICIENCY_TARGET_MIN) &                        (eff_vals <= config.EFFICIENCY_TARGET_MAX)).mean() * 100        else:            in_range = ((eff_vals >= config.EFFICIENCY_TARGET_MIN * 100) &                        (eff_vals <= config.EFFICIENCY_TARGET_MAX * 100)).mean() * 100        quality_metrics.append(in_range)        quality_labels.append(f'Eff. in Range\n({in_range:.1f}%)')        if quality_metrics:        bars = ax12.bar(range(len(quality_metrics)), quality_metrics, alpha=0.7)        ax12.set_title('Data Quality Metrics')        ax12.set_ylabel('Score (%)')        ax12.set_xticks(range(len(quality_labels)))        ax12.set_xticklabels(quality_labels, fontsize=8)        ax12.set_ylim(0, 100)        ax12.grid(True, alpha=0.3)                # Color bars based on quality        for bar, value in zip(bars, quality_metrics):            if value >= 95:                bar.set_color('green')            elif value >= 85:                bar.set_color('orange')            else:                bar.set_color('red')                # Add value labels        for bar, val in zip(bars, quality_metrics):            ax12.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,                     f'{val:.1f}%', ha='center', va='bottom', fontsize=8)        plt.tight_layout()    plt.subplots_adjust(top=0.95)  # Make room for main title    plt.show()    # Create the comprehensive dashboardif data is not None:    create_performance_dashboard(data, config)else:

In [None]:
# Load and analyze the data
print("Loading validated Massachusetts boiler simulation dataset...")
data = load_validated_boiler_data(config.CSV_FILE, config.METADATA_FILE)

if data is not None:
    print(f"Dataset loaded successfully: {len(data):,} records")
    
    # Perform comprehensive analysis
    perf_results = analyze_operational_performance(data, config)
    coal_results = analyze_coal_quality_impact(data)
    soot_results = analyze_soot_blowing_patterns(data)
    
    # Create performance dashboard
    create_performance_dashboard(data, config)
    
    print("\nAnalysis complete!")
else:
    print("Failed to load data. Please check file path.")