In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Carpeta con los archivos CSV
csv_folder = '../output/master'

# Cargar los DataFrames
dfs = {}
for filename in os.listdir(csv_folder):
    if filename.endswith('.csv'):
        file_path = os.path.join(csv_folder, filename)
        df = pd.read_csv(file_path)
        df.columns = df.columns.str.strip()
        # Remove any rows where the type is enum
        df = df[df['type'] != 'enum']
        # Remove any rows where the class name contains ".dto." 
        df = df[~df['class'].str.contains('.dto.')]
        #df = df[~df['class'].str.contains('.domain.')]
        df = df[['class', 'wmc', 'dit', 'noc', 'cbo', 'rfc', 'lcom']]
        df.columns = ['ClassName', 'WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM']
        key = os.path.splitext(filename)[0]
        dfs[key] = df

# Métricas a analizar
#metrics = ['WMC', 'DIT', 'NOC', 'CBO', 'RFC', 'LCOM']
metrics = ['WMC', 'CBO', 'RFC', 'LCOM']
num_repos = len(dfs)
num_metrics = len(metrics)

metrics_scale = {
    'WMC': float(0.25),
    'CBO': float(0.25),
    'RFC': float(0.25),
    'LCOM': float(0.25),
}

# Calcular límites comunes por métrica (columna)
x_min_by_metric = {metric: float(1000) for metric in metrics}
x_max_by_metric = {metric: float(1) for metric in metrics}

for df in dfs.values():
    for metric in metrics:        
        # Calcular percentiles min y max
        x_min_by_metric[metric] = min(x_min_by_metric[metric], df[metric].quantile(0.05))
        x_max_by_metric[metric] = max(x_max_by_metric[metric], df[metric].quantile(0.95))

# Crear subplots
fig, axes = plt.subplots(
    nrows=num_repos, ncols=num_metrics, 
    figsize=(5 * num_metrics, 5 * num_repos), 
    sharey=False, sharex=False
)

for row, (repo_name, df) in enumerate(dfs.items()):
    for col, metric in enumerate(metrics):
        ax = axes[row, col]
        binwidth = (x_max_by_metric[metric] - x_min_by_metric[metric]) / 50
        sns.histplot(df[metric], binwidth=binwidth, kde=True, ax=ax, edgecolor='black')
        #sns.histplot(df[metric], bins=50, kde=True, ax=ax, edgecolor='black')
        ax.set_title(f'{metric} - {repo_name}', fontsize=14, fontweight='bold')
        ax.set_xlabel(metric, fontsize=12)
        ax.set_ylabel('Frequency', fontsize=12)
        ax.grid(True, linestyle='--', alpha=0.7)
        ax.text(
            0.95, 0.95, 
            f'Classes: {len(df[metric])}\nMean: {df[metric].mean():.2f}\nMedian: {df[metric].median():.2f}\nMax: {df[metric].max():.2f}', 
            transform=ax.transAxes, verticalalignment='top', horizontalalignment='right'
        )
        # Ajustar límites del eje x y eje y según la métrica
        ax.set_xlim(x_min_by_metric[metric], x_max_by_metric[metric])
        ax.set_ylim(0, len(df[metric]) * metrics_scale[metric])
plt.tight_layout()
plt.show()

