In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Configuration ---
GLOBAL_METRICS_FILE = "./global_metrics.csv"
CLIENT_METRICS_FILE = "./client_metrics.csv"
FINAL_ROUND_CLIENT_COUNT = 12 # Adjust if your client count changes

def generate_fl_plots():
    # 1. Load Data
    try:
        df_global = pd.read_csv(GLOBAL_METRICS_FILE)
        df_client = pd.read_csv(CLIENT_METRICS_FILE)
    except FileNotFoundError as e:
        print(f"Error: {e}. Ensure both CSV files are in the current directory.")
        return

    # --- Data Cleaning and Aggregation ---
    
    # Filter client data to only include evaluation metrics (local test set)
    df_client_eval = df_client[df_client['phase'] == 'evaluate'].copy()
    
    # Calculate the average client accuracy for each round
    df_avg_client_acc = df_client_eval.groupby('round')['accuracy'].mean().reset_index()
    df_avg_client_acc.rename(columns={'accuracy': 'avg_client_accuracy'}, inplace=True)
    
    # Find the maximum round for final round analysis
    max_round = df_global['round'].max()
    print(f"Data loaded successfully. Max round found: {max_round}")

    # ====================================================================
    # PLOT 1: Global Convergence vs. Average Client Performance
    # ====================================================================
    
    # Merge global metrics and average client metrics
    df_combined = pd.merge(df_global, df_avg_client_acc, on='round', how='inner')
    
    plt.figure(figsize=(10, 6))
    
    # Plot Global Accuracy
    plt.plot(df_combined['round'], df_combined['accuracy'], 
             label='Global Model Accuracy (FedAvg)', 
             marker='o', linestyle='-', color='blue')
    
    # Plot Average Client Accuracy
    plt.plot(df_combined['round'], df_combined['avg_client_accuracy'], 
             label='Average Local Client Accuracy', 
             marker='s', linestyle='--', color='red', alpha=0.7)
    
    plt.title('FL Convergence: Global Model vs. Average Client Accuracy Over Rounds', fontsize=14)
    plt.xlabel('Server Round', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.legend(fontsize=10)
    plt.xticks(np.arange(0, max_round + 1, 2)) # Show every 2nd round
    plt.tight_layout()
    plt.savefig('global_convergence_plot.png')
    plt.close()
    
    print("Generated plot: global_convergence_plot.png")

    # ====================================================================
    # PLOT 2: Client Heterogeneity (Final Round Performance)
    # ====================================================================

    # Filter data for the final evaluation round
    df_final_round = df_client_eval[df_client_eval['round'] == max_round].sort_values(by='cid')

    # Convert cid to integer for cleaner plotting
    df_final_round['cid'] = df_final_round['cid'].astype(int)

    plt.figure(figsize=(10, 6))
    
    # Create the Bar Plot
    bars = plt.bar(df_final_round['cid'].apply(lambda x: f'Client {x}'), 
                   df_final_round['accuracy'], 
                   color='teal')
    
    # Add the Global Model Accuracy line for comparison
    global_acc_final = df_global[df_global['round'] == max_round]['accuracy'].iloc[0]
    plt.axhline(global_acc_final, color='red', linestyle='--', 
                label=f'Global Accuracy ({global_acc_final:.4f})', linewidth=2)
    
    # Label the bars with their accuracy values
    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval + 0.01, 
                 f'{yval:.4f}', ha='center', va='bottom', fontsize=9)

    plt.title(f'Client Performance Heterogeneity (Round {max_round})', fontsize=14)
    plt.xlabel('Client ID', fontsize=12)
    plt.ylabel('Local Validation Accuracy', fontsize=12)
    plt.ylim(0, 1) # Set Y-axis from 0 to 1 for accuracy visualization
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.savefig('client_heterogeneity_plot.png')
    plt.close()

    print("Generated plot: client_heterogeneity_plot.png")

if __name__ == '__main__':
    generate_fl_plots()

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# --- Configuration ---
GLOBAL_METRICS_FILE = "./global_metrics.csv"
CLIENT_METRICS_FILE = "./client_metrics.csv"
FINAL_ROUND_CLIENT_COUNT = 12 # Adjust if your client count changes


try:
    df_global = pd.read_csv(GLOBAL_METRICS_FILE)
    df_client = pd.read_csv(CLIENT_METRICS_FILE)
except FileNotFoundError as e:
    print(f"Error: {e}. Ensure both CSV files are in the current directory.")
    return

# --- Data Cleaning and Aggregation ---

# Filter client data to only include evaluation metrics (local test set)
df_client_eval = df_client[df_client['phase'] == 'evaluate'].copy()

# Calculate the average client accuracy for each round
df_avg_client_acc = df_client_eval.groupby('round')['accuracy'].mean().reset_index()
df_avg_client_acc.rename(columns={'accuracy': 'avg_client_accuracy'}, inplace=True)

# Find the maximum round for final round analysis
max_round = df_global['round'].max()
print(f"Data loaded successfully. Max round found: {max_round}")


# ====================================================================
# PLOT 2: Client Heterogeneity (Final Round Performance)
# ====================================================================

# Filter data for the final evaluation round
df_final_round = df_client_eval[df_client_eval['round'] == max_round].sort_values(by='cid')

# Convert cid to integer for cleaner plotting
df_final_round['cid'] = df_final_round['cid'].astype(int)

plt.figure(figsize=(10, 6))

# Create the Bar Plot
bars = plt.bar(df_final_round['cid'].apply(lambda x: f'Client {x}'), 
               df_final_round['accuracy'], 
               color='teal')

# Add the Global Model Accuracy line for comparison
global_acc_final = df_global[df_global['round'] == max_round]['accuracy'].iloc[0]
plt.axhline(global_acc_final, color='red', linestyle='--', 
            label=f'Global Accuracy ({global_acc_final:.4f})', linewidth=2)

# Label the bars with their accuracy values
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.01, 
             f'{yval:.4f}', ha='center', va='bottom', fontsize=9)

plt.title(f'Client Performance Heterogeneity (Round {max_round})', fontsize=14)
plt.xlabel('Client ID', fontsize=12)
plt.ylabel('Local Validation Accuracy', fontsize=12)
plt.ylim(0, 1) # Set Y-axis from 0 to 1 for accuracy visualization
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()




ModuleNotFoundError: No module named 'pandas'

In [None]:
plt.savefig('client_heterogeneity_plotv2.png')
plt.close()

print("Generated plot: client_heterogeneity_plot.png")