input output semantic metrics

GRAMMAR

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv("gr_gpt_ALL.csv")
df_gemini = pd.read_csv("gr_gemini_ALL.csv")

def plot_binned_median(ax, x, y, color, label, linestyle='-', n_bins=20, min_samples=40):

    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan

    x_min, x_max = x_clean.min(), x_clean.max()
    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians = []
    x_centers = []

    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        # calculate median only if there are enough samples
        if mask_bin.sum() >= min_samples:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
               markersize=8, label=label, linestyle=linestyle, alpha=0.8)

    # Calcola correlazione originale
    return np.corrcoef(x_clean, y_clean)[0, 1]

color_acc = '#e74c3c'
color_comp = '#2ecc71'
color_kcc = '#3498db'

metrics = [
    ('acc_median_final', 'Accuracy', color_acc),
    ('comp_final_median', 'Completeness', color_comp),
    ('key_concept_coverage', 'KCC', color_kcc)
]

fig, axes = plt.subplots(2, 2, figsize=(16, 11))


# GPT - WER
ax = axes[0, 0]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gpt['WER'], df_gpt[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('WER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - WER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GPT - CER
ax = axes[1, 0]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gpt['CER'], df_gpt[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('CER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - CER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')


# GEMINI - WER
ax = axes[0, 1]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gemini['WER'], df_gemini[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('WER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - WER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GEMINI - CER
ax = axes[1, 1]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gemini['CER'], df_gemini[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('CER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - CER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_all_metrics_filtered.png", dpi=150, bbox_inches='tight')
plt.show()


SYNTAX

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv("synt_gpt_ALL.csv")
df_gemini = pd.read_csv("synt_gemini_ALL.csv")

def plot_binned_median(ax, x, y, color, label, linestyle='-', n_bins=20, min_samples=40):
    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan

    x_min, x_max = x_clean.min(), x_clean.max()
    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians = []
    x_centers = []

    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        # calculate median only if there are enough samples (40)
        if mask_bin.sum() >= min_samples:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
               markersize=8, label=label, linestyle=linestyle, alpha=0.8)

    return np.corrcoef(x_clean, y_clean)[0, 1]

color_acc = '#e74c3c'
color_comp = '#2ecc71'
color_kcc = '#3498db'

metrics = [
    ('acc_median_final', 'Accuracy', color_acc),
    ('comp_final_median', 'Completeness', color_comp),
    ('key_concept_coverage', 'KCC', color_kcc)
]

fig, axes = plt.subplots(2, 2, figsize=(16, 11))

# === LEFT: GPT ===

# GPT - WER
ax = axes[0, 0]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gpt['WER'], df_gpt[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('WER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - WER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GPT - CER
ax = axes[1, 0]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gpt['CER'], df_gpt[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('CER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - CER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# === RIGHT: GEMINI ===

# GEMINI - WER
ax = axes[0, 1]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gemini['WER'], df_gemini[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('WER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - WER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GEMINI - CER
ax = axes[1, 1]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gemini['CER'], df_gemini[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('CER', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - CER vs All Metrics', fontsize=13, fontweight='bold')
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_all_metrics_filtered.png", dpi=150, bbox_inches='tight')
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Leggi i dati
df_gpt = pd.read_csv("read_gpt_ALL.csv")
df_gemini = pd.read_csv("read_gemini_ALL.csv")

# Funzione per creare fasce e calcolare mediane
def plot_binned_median(ax, x, y, color, label, metric_name, linestyle='-', n_bins=20):
    # Rimuovi NaN
    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan


    x_min, x_max = 0, 100

    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians = []
    x_centers = []

    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        if mask_bin.sum() > 0:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
               markersize=8, label=label, linestyle=linestyle, alpha=0.8)


    return np.corrcoef(x_clean, y_clean)[0, 1]

color_acc = '#e74c3c'
color_comp = '#2ecc71'
color_kcc = '#3498db'

metrics = [
    ('acc_median_final', 'Accuracy', color_acc),
    ('comp_final_median', 'Completeness', color_comp),
    ('key_concept_coverage', 'KCC', color_kcc)
]

fig, axes = plt.subplots(3, 2, figsize=(16, 15))

readability_indices = [
    ('Flesch_Reading_Ease', 'Flesch Reading Ease'),
    ('Flesch_Kincaid_Grade', 'Flesch-Kincaid Grade'),
    ('Gunning_Fog_Index', 'Gunning Fog Index')
]

# === LEFT: GPT ===
for i, (read_col, read_name) in enumerate(readability_indices):
    ax = axes[i, 0]
    for metric_col, metric_name, color in metrics:
        corr = plot_binned_median(ax, df_gpt[read_col], df_gpt[metric_col],
                                  color, metric_name, read_col, linestyle='-')
    ax.set_xlabel(read_name, fontsize=12, fontweight='bold')
    ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
    ax.set_title(f'GPT - {read_name} vs All Metrics', fontsize=13, fontweight='bold')
    ax.set_xlim([0, 100 if read_col == 'Flesch_Reading_Ease' else 50])
    ax.set_ylim([0, 1.05])
    ax.grid(alpha=0.3)
    ax.legend(fontsize=11, loc='best')

# === RIGHT: GEMINI ===
for i, (read_col, read_name) in enumerate(readability_indices):
    ax = axes[i, 1]
    for metric_col, metric_name, color in metrics:
        corr = plot_binned_median(ax, df_gemini[read_col], df_gemini[metric_col],
                                  color, metric_name, read_col, linestyle='-')
    ax.set_xlabel(read_name, fontsize=12, fontweight='bold')
    ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
    ax.set_title(f'Gemini - {read_name} vs All Metrics', fontsize=13, fontweight='bold')
    ax.set_xlim([0, 100 if read_col == 'Flesch_Reading_Ease' else 50])
    ax.set_ylim([0, 1.05])
    ax.grid(alpha=0.3)
    ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_readability_metrics.png", dpi=150, bbox_inches='tight')
plt.show()


INFDENS

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv("infdens_gpt_ALL.csv")
df_gemini = pd.read_csv("infdens_gemini_ALL.csv")

def plot_binned_median(ax, x, y, color, label, linestyle='-', n_bins=20):
    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan

    # Range  0-100
    x_min, x_max = -0.1, 1
    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians = []
    x_centers = []

    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        count_bin = mask_bin.sum()
        if count_bin >= 40:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
               markersize=8, label=label, linestyle=linestyle, alpha=0.8)

    return np.corrcoef(x_clean, y_clean)[0, 1]

color_acc = '#e74c3c'
color_comp = '#2ecc71'
color_kcc = '#3498db'

metrics = [
    ('acc_median_final', 'Accuracy', color_acc),
    ('comp_final_median', 'Completeness', color_comp),
    ('key_concept_coverage', 'KCC', color_kcc)
]

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# === GPT ===
ax = axes[0]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gpt['score'], df_gpt[metric_col], color, metric_name)
ax.set_xlabel('Score', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Score vs All Metrics', fontsize=13, fontweight='bold')
ax.set_xlim([-0.2, 0.9])
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# === GEMINI ===
ax = axes[1]
for metric_col, metric_name, color in metrics:
    corr = plot_binned_median(ax, df_gemini['score'], df_gemini[metric_col], color, metric_name)
ax.set_xlabel('Score', fontsize=12, fontweight='bold')
ax.set_ylabel('Metric Value (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Score vs All Metrics', fontsize=13, fontweight='bold')
ax.set_xlim([-0.2, 0.9])
ax.set_ylim([0, 1.05])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_score_metrics.png", dpi=150, bbox_inches='tight')
plt.show()


Input-output error metric

GRAMMAR

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv("gr_gpt_ALL.csv")
df_gemini = pd.read_csv("gr_gemini_ALL.csv")

def plot_binned_median(ax, x, y, color, label, linestyle='-', n_bins=20, min_samples=40):
    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan

    x_min, x_max = x_clean.min(), x_clean.max()
    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians = []
    x_centers = []

    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        if mask_bin.sum() >= min_samples:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
               markersize=8, label=label, linestyle=linestyle, alpha=0.8)

    return np.corrcoef(x_clean, y_clean)[0, 1]

color_wer = '#e74c3c'      # WER = red
color_cer = '#3498db'      # CER = blue

fig, axes = plt.subplots(2, 2, figsize=(16, 11))

# === LEFT: GPT ===

# GPT - Num Addition
ax = axes[0, 0]
corr_wer = plot_binned_median(ax, df_gpt['Num_addition'], df_gpt['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gpt['Num_addition'], df_gpt['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Addition vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 20])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GPT - Num Replicate Errors
ax = axes[1, 0]
corr_wer = plot_binned_median(ax, df_gpt['Num_direct_modifications'], df_gpt['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gpt['Num_direct_modifications'], df_gpt['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Replicate Errors vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 30])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# === RIGHT: GEMINI ===

# GEMINI - Num Addition
ax = axes[0, 1]
corr_wer = plot_binned_median(ax, df_gemini['Num_addition'], df_gemini['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gemini['Num_addition'], df_gemini['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Addition vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 20])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GEMINI - Num Replicate Errors
ax = axes[1, 1]
corr_wer = plot_binned_median(ax, df_gemini['Num_direct_modifications'], df_gemini['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gemini['Num_direct_modifications'], df_gemini['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Replicate Errors vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 30])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_errors_vs_wer_cer_filtered.png", dpi=150, bbox_inches='tight')
plt.show()



SYNTAX

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv("synt_gpt_ALL.csv")
df_gemini = pd.read_csv("synt_gemini_ALL.csv")

def plot_binned_median(ax, x, y, color, label, linestyle='-', n_bins=20, min_samples=40):
    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan

    x_min, x_max = x_clean.min(), x_clean.max()
    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians = []
    x_centers = []

    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        if mask_bin.sum() >= min_samples:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
               markersize=8, label=label, linestyle=linestyle, alpha=0.8)

    return np.corrcoef(x_clean, y_clean)[0, 1]

color_wer = '#e74c3c'
color_cer = '#3498db'

fig, axes = plt.subplots(2, 2, figsize=(16, 11))

# === LEFT: GPT ===

# GPT - Num Addition
ax = axes[0, 0]
corr_wer = plot_binned_median(ax, df_gpt['Num_addition'], df_gpt['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gpt['Num_addition'], df_gpt['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Addition vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 20])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GPT - Num Replicate Errors
ax = axes[1, 0]
corr_wer = plot_binned_median(ax, df_gpt['Num_direct_modifications'], df_gpt['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gpt['Num_direct_modifications'], df_gpt['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Replicate Errors vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 30])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# === RIGHT: GEMINI ===

# GEMINI - Num Addition
ax = axes[0, 1]
corr_wer = plot_binned_median(ax, df_gemini['Num_addition'], df_gemini['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gemini['Num_addition'], df_gemini['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Addition vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 20])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GEMINI - Num Replicate Errors
ax = axes[1, 1]
corr_wer = plot_binned_median(ax, df_gemini['Num_direct_modifications'], df_gemini['WER'],
                              color_wer, 'WER', linestyle='-')
corr_cer = plot_binned_median(ax, df_gemini['Num_direct_modifications'], df_gemini['CER'],
                              color_cer, 'CER', linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('WER/CER (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Replicate Errors vs WER/CER', fontsize=13, fontweight='bold')
ax.set_xlim([0, 30])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_errors_vs_wer_cer_filtered.png", dpi=150, bbox_inches='tight')
plt.show()



READ

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv("read_gpt_ALL.csv")
df_gemini = pd.read_csv("read_gemini_ALL.csv")

def plot_binned_median(ax, x, y, color, label, linestyle='-', n_bins=20):
    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan

    x_min, x_max = x_clean.min(), x_clean.max()
    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians = []
    x_centers = []

    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        count_bin = mask_bin.sum()
        if count_bin >= 40:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
               markersize=8, label=label, linestyle=linestyle, alpha=0.8)

    return np.corrcoef(x_clean, y_clean)[0, 1]

color_fre = '#e74c3c'
color_fk = '#2ecc71'
color_gfi = '#3498db'

readability_metrics = [
    ('Flesch_Reading_Ease', 'Flesch Reading Ease', color_fre),
    ('Flesch_Kincaid_Grade', 'Flesch-Kincaid Grade', color_fk),
    ('Gunning_Fog_Index', 'Gunning Fog Index', color_gfi)
]

fig, axes = plt.subplots(2, 2, figsize=(16, 11))

# === LEFT: GPT ===

# GPT - Num Addition
ax = axes[0, 0]
for metric_col, metric_name, color in readability_metrics:
    corr = plot_binned_median(ax, df_gpt['Num_addition'], df_gpt[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('Readability Score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Addition vs Readability Indices', fontsize=13, fontweight='bold')
ax.set_xlim([0, 8])
ax.set_ylim([0, 100])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GPT - Num Replicate Errors
ax = axes[1, 0]
for metric_col, metric_name, color in readability_metrics:
    corr = plot_binned_median(ax, df_gpt['Num_direct_modifications'], df_gpt[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('Readability Score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Replicate Errors vs Readability Indices', fontsize=13, fontweight='bold')
ax.set_xlim([0, 12])
ax.set_ylim([-50, 50])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# === RIGHT: GEMINI ===

# GEMINI - Num Addition
ax = axes[0, 1]
for metric_col, metric_name, color in readability_metrics:
    corr = plot_binned_median(ax, df_gemini['Num_addition'], df_gemini[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('Readability Score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Addition vs Readability Indices', fontsize=13, fontweight='bold')
ax.set_xlim([0, 10])
ax.set_ylim([0, 60])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GEMINI - Num Replicate Errors
ax = axes[1, 1]
for metric_col, metric_name, color in readability_metrics:
    corr = plot_binned_median(ax, df_gemini['Num_direct_modifications'], df_gemini[metric_col],
                              color, metric_name, linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('Readability Score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Replicate Errors vs Readability Indices', fontsize=13, fontweight='bold')
ax.set_xlim([0, 10])
ax.set_ylim([0, 50])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_errors_vs_readability.png", dpi=150, bbox_inches='tight')
plt.show()


INFDENS

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_gpt = pd.read_csv("infdens_gpt_ALL.csv")
df_gemini = pd.read_csv("infdens_gpt_ALL.csv")

def plot_binned_median(ax, x, y, color, label, linestyle='-', n_bins=20):
    mask = ~(np.isnan(x) | np.isnan(y))
    x_clean = x[mask].values
    y_clean = y[mask].values

    if len(x_clean) == 0:
        return np.nan

    x_min, x_max = x_clean.min(), x_clean.max()
    bins = np.linspace(x_min, x_max, n_bins + 1)

    y_medians, x_centers = [], []
    for i in range(len(bins) - 1):
        mask_bin = (x_clean >= bins[i]) & (x_clean < bins[i + 1])
        count_bin = mask_bin.sum()
        if count_bin >= 40:
            y_medians.append(np.median(y_clean[mask_bin]))
            x_centers.append((bins[i] + bins[i + 1]) / 2)

    if len(y_medians) > 0:
        ax.plot(x_centers, y_medians, 'o-', color=color, linewidth=2.5,
                markersize=8, label=label, linestyle=linestyle, alpha=0.8)

    # Calcola correlazione
    return np.corrcoef(x_clean, y_clean)[0, 1]

color_score = '#3498db'

fig, axes = plt.subplots(2, 2, figsize=(16, 11))

# === LEFT: GPT ===

# GPT - Num Addition
ax = axes[0, 0]
plot_binned_median(ax, df_gpt['Num_addition'], df_gpt['score'],
                   color_score, 'score', linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Addition vs score', fontsize=13, fontweight='bold')
ax.set_xlim([0, 7])
ax.set_ylim([0, 1])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GPT - Num Replicate Errors
ax = axes[1, 0]
plot_binned_median(ax, df_gpt['Num_direct_modifications'], df_gpt['score'],
                   color_score, 'score', linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('GPT - Num Replicate Errors vs score', fontsize=13, fontweight='bold')
ax.set_xlim([0, 16])
ax.set_ylim([0, 1])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# === RIGHT: GEMINI ===

# GEMINI - Num Addition
ax = axes[0, 1]
plot_binned_median(ax, df_gemini['Num_addition'], df_gemini['score'],
                   color_score, 'score', linestyle='-')
ax.set_xlabel('Num Addition', fontsize=12, fontweight='bold')
ax.set_ylabel('score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Addition vs score', fontsize=13, fontweight='bold')
ax.set_xlim([0, 7])
ax.set_ylim([0, 1])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

# GEMINI - Num Replicate Errors
ax = axes[1, 1]
plot_binned_median(ax, df_gemini['Num_direct_modifications'], df_gemini['score'],
                   color_score, 'score', linestyle='-')
ax.set_xlabel('Num Replicate Errors', fontsize=12, fontweight='bold')
ax.set_ylabel('score (Median per bin)', fontsize=12, fontweight='bold')
ax.set_title('Gemini - Num Replicate Errors vs score', fontsize=13, fontweight='bold')
ax.set_xlim([0, 18])
ax.set_ylim([0, 1])
ax.grid(alpha=0.3)
ax.legend(fontsize=11, loc='best')

plt.tight_layout()
plt.savefig("gpt_vs_gemini_errors_vs_score.png", dpi=150, bbox_inches='tight')
plt.show()
