In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

csv_folder = 'csv'

# Create a dictionary to hold the DataFrames
dfs = {}

# Iterate over all files in the folder
for filename in os.listdir(csv_folder):
    file_path = os.path.join(csv_folder, filename)
    df = pd.read_csv(file_path)
    key = os.path.splitext(filename)[0]
    dfs[key] = df

# Generate histograms for each metric from both DataFrames and display them side by side using subplots
metrics = ['WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM']

num_repos = len(dfs)
num_metrics = len(metrics)

fig, axes = plt.subplots(nrows=num_repos, ncols=num_metrics, figsize=(5 * num_metrics, 5 * num_repos), sharey=False, sharex=False)

for row, (repo_name, df) in enumerate(dfs.items()):
    for col, metric in enumerate(metrics):
        ax = axes[row, col]
        ax.hist(df[metric], bins=50, edgecolor='black')
        ax.set_title(f'{metric} - {repo_name}', fontsize=14, fontweight='bold')
        ax.set_xlabel(metric, fontsize=12)
        ax.set_ylabel('Frequency', fontsize=12)
        ax.grid(True, linestyle='--', alpha=0.7)
        ax.text(0.95, 0.95, f'Classes: {len(df[metric]):.2f}\nMedian: {df[metric].median():.2f}\nMax: {df[metric].max():.2f}', 
                transform=ax.transAxes, verticalalignment='top', horizontalalignment='right')

plt.tight_layout()
plt.show()