In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

csv_folder = 'csv'

# Create a dictionary to hold the DataFrames
dfs = {}

# Iterate over all files in the folder
for filename in os.listdir(csv_folder):
    file_path = os.path.join(csv_folder, filename)
    df = pd.read_csv(file_path)
    key = os.path.splitext(filename)[0]
    dfs[key] = df

# Generate histograms for each metric from both DataFrames and display them side by side using subplots
metrics = ['WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM']

for metric in metrics:
    fig, axes = plt.subplots(nrows=1, ncols=len(dfs), figsize=(16, 5), sharey=False, sharex=False)

    for i, (repo_name, df) in enumerate(dfs.items()):
        axes[i].hist(df[metric], bins=50, edgecolor='black')
        axes[i].set_title(f'{metric} - {repo_name}', fontsize=14, fontweight='bold')
        axes[i].set_xlabel(metric, fontsize=12)
        axes[i].set_ylabel('Frequency', fontsize=12)
        axes[i].grid(True, linestyle='--', alpha=0.7)
        axes[i].text(0.95, 0.95, f'Classes: {len(df)}\n\nMean: {df[metric].mean():.2f}\nMedian: {df[metric].median():.2f}\nMax: {df[metric].max():.2f}', 
                        transform=axes[i].transAxes, verticalalignment='top', horizontalalignment='right')
    
    plt.tight_layout()
    plt.show()