In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
import numpy as np
import plotly.colors as pc

def hex_to_rgba(hex_code, alpha=1.0):
    """Convert hex color to RGBA with specified alpha"""
    return mcolors.to_rgba(hex_code, alpha)

# Data setup
np.random.seed(42)
enzyme_classes = [f'EC:{i}' for i in range(1, 6)]
split_methods = ['Random (Standard)', 'Clustered (Realistic)']
data_splits = ['Train', 'Test']

# Generate synthetic data
data_points = []
num_samples_per_condition = 40

for enzyme_class in enzyme_classes:
    for method in split_methods:
        for split in data_splits:
            # Skip train data for EC:5 (zero-shot scenario)
            if enzyme_class == 'EC:5' and split == 'Train':
                continue
            
            # Set performance characteristics
            if method == 'Random (Standard)':
                base_score = 0.98 if split == 'Train' else 0.97
                noise_std = 0.015
            else:  # Clustered (Realistic)
                base_score = 0.96 if split == 'Train' else 0.45
                noise_std = 0.02 if split == 'Train' else 0.15
            
            # Generate samples
            for _ in range(num_samples_per_condition):
                score = base_score + np.random.normal(0, noise_std)
                score = min(1.0, max(0.0, score))
                
                # Add occasional outliers
                if np.random.rand() < 0.05:
                    score -= 0.3
                
                condition_label = f"{method} - {split}"
                data_points.append({
                    'EC_Class': enzyme_class,
                    'Condition': condition_label,
                    'F1_Score': score
                })

df = pd.DataFrame(data_points)

# Colors and palette
COLOR_RANDOM = pc.qualitative.Plotly[0]  # Blue
COLOR_CLUSTERED = pc.qualitative.Plotly[1]  # Red

condition_order = [
    "Random (Standard) - Train",
    "Random (Standard) - Test",
    "Clustered (Realistic) - Train",
    "Clustered (Realistic) - Test"
]

color_palette = {
    "Random (Standard) - Train": hex_to_rgba(COLOR_RANDOM, alpha=0.3),
    "Random (Standard) - Test": hex_to_rgba(COLOR_RANDOM, alpha=1.0),
    "Clustered (Realistic) - Train": hex_to_rgba(COLOR_CLUSTERED, alpha=0.3),
    "Clustered (Realistic) - Test": hex_to_rgba(COLOR_CLUSTERED, alpha=1.0)
}

# Plot setup
sns.set_theme(style="white", rc={"axes.spines.left": False, "axes.spines.bottom": True})
fig, ax = plt.subplots(figsize=(10, 6))

# Boxplot
sns.boxplot(
    data=df,
    x="EC_Class",
    y="F1_Score",
    hue="Condition",
    hue_order=condition_order,
    palette=color_palette,
    dodge=True,
    fliersize=0,
    linewidth=1.5,
    width=0.6,
    saturation=1,
    ax=ax,
    boxprops=dict(edgecolor=None)
)

# Stripplot overlay
sns.stripplot(
    data=df,
    x="EC_Class",
    y="F1_Score",
    hue="Condition",
    hue_order=condition_order,
    palette=color_palette,
    dodge=True,
    size=5,
    linewidth=1,
    edgecolor='white',
    jitter=0.2,
    ax=ax
)

# Styling
sns.despine(left=True, bottom=False, top=True, right=True)
ax.yaxis.grid(True, color='#E0E0E0', linestyle='-', linewidth=0.5)
ax.set_axisbelow(True)

# Titles
plt.text(
    x=-0.5, y=1.12,
    s="Structure-aware splitting could reveal true model performance",
    fontsize=16, color='#333333', ha='left'
)
plt.text(
    x=-0.5, y=1.07,
    s="F1 Score comparison: Standard Random Split vs. Realistic Clustered Split",
    fontsize=11, color='#666666', ha='left'
)

# Axes
ax.set_ylabel("F1 Score", fontsize=10)
ax.tick_params(axis='x', colors='#333333', length=0)
ax.tick_params(axis='y', colors='#666666', length=0)

# Legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(
    handles[:4], labels[:4],
    loc='center left',
    bbox_to_anchor=(1, 0.5),
    ncol=1,
    frameon=False,
    title=None
)

plt.tight_layout(rect=[0, 0, 0.85, 1])
plt.savefig('f1_score_comparison_enzyme_split.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
import altair as alt
import pandas as pd
import numpy as np

# Data setup
np.random.seed(42)
enzyme_classes = [f'EC:{i}' for i in range(1, 6)]
split_methods = ['Random (Standard)', 'Clustered (Realistic)']
data_splits = ['Train', 'Test']

# Generate synthetic data
data_points = []
num_samples_per_condition = 40

for enzyme_class in enzyme_classes:
    for method in split_methods:
        for split in data_splits:
            # Skip train data for EC:5 (zero-shot scenario)
            if enzyme_class == 'EC:5' and split == 'Train':
                continue
            
            # Set performance characteristics
            if method == 'Random (Standard)':
                base_score = 0.98 if split == 'Train' else 0.97
                noise_std = 0.015
            else:  # Clustered (Realistic)
                base_score = 0.96 if split == 'Train' else 0.45
                noise_std = 0.02 if split == 'Train' else 0.15
            
            # Generate samples
            for _ in range(num_samples_per_condition):
                score = base_score + np.random.normal(0, noise_std)
                score = min(1.0, max(0.0, score))
                
                # Add occasional outliers
                if np.random.rand() < 0.05:
                    score -= 0.3
                
                data_points.append({
                    'EC_Class': enzyme_class,
                    'Method': method,
                    'Split': split,
                    'F1_Score': score
                })

df = pd.DataFrame(data_points)

# Create condition column for coloring
df['Condition'] = df['Method'] + ' - ' + df['Split']

# Define colors with opacity
color_scale = alt.Scale(
    domain=[
        'Random (Standard) - Train',
        'Random (Standard) - Test',
        'Clustered (Realistic) - Train',
        'Clustered (Realistic) - Test'
    ],
    range=['#636EFA80', '#636EFA', '#EF553B80', '#EF553B']
)

# Create boxplot
boxplot = alt.Chart(df).mark_boxplot(
    size=50,
    opacity=0.7
).encode(
    x=alt.X('EC_Class:N', title='', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('F1_Score:Q', title='F1 Score', scale=alt.Scale(domain=[0, 1.05])),
    color=alt.Color('Condition:N', scale=color_scale, legend=alt.Legend(title=None)),
    xOffset='Condition:N'
)

# Create stripplot (jittered points)
stripplot = alt.Chart(df).mark_circle(
    size=30,
    opacity=0.6
).encode(
    x=alt.X('EC_Class:N'),
    y=alt.Y('F1_Score:Q'),
    color=alt.Color('Condition:N', scale=color_scale, legend=None),
    xOffset='Condition:N'
).transform_calculate(
    jitter='random() * 0.4 - 0.2'
)

# Combine layers
chart = (boxplot + stripplot).properties(
    width=700,
    height=400,
    title={
        'text': 'Structure-aware splitting could reveal true model performance',
        'subtitle': 'F1 Score comparison: Standard Random Split vs. Realistic Clustered Split',
        'fontSize': 16,
        'subtitleFontSize': 11,
        'anchor': 'start'
    }
).configure_view(
    strokeWidth=0
).configure_axis(
    gridColor='#E0E0E0',
    gridOpacity=0.5,
    domainWidth=0.5
).configure_legend(
    orient='right',
    direction='vertical',
    strokeColor='white'
)

chart.save('f1_score_comparison_enzyme_split.html')
chart

In [None]:
%pip install altair