# CI/CD Carbon Emissions — Statistical Analysis

**Research:** *Towards Greener Pipelines: Measuring and Optimising Carbon Emissions in CI/CD Workflows*  
**Subject project:** HTTPie CLI (fork of `httpie/cli`)  
**Experiment configurations:**
- **C1** — Baseline (3 original workflow files + Eco-CI)
- **C2** — Pip Caching added
- **C3** — Consolidated single workflow file
- **C4** — Combined (cache + path filters + consolidation)

Energy grid intensities used for SCI scoring:
- **Ireland** — 345 gCO₂eq/kWh
- **Norway** — 25 gCO₂eq/kWh

In [None]:
# ── Dependencies ─────────────────────────────────────────────────────────────
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from scipy import stats
from pathlib import Path

# Style
plt.rcParams.update({
    'figure.dpi': 150,
    'font.family': 'DejaVu Sans',
    'axes.spines.top': False,
    'axes.spines.right': False,
})

FIGURES_DIR = Path('../results/figures')
FIGURES_DIR.mkdir(parents=True, exist_ok=True)

# Grid intensity (gCO2eq/kWh → convert to kgCO2eq/J for SCI)
# 1 Wh = 3600 J  →  1 kWh = 3_600_000 J
GRID_IE = 345 / 3_600_000   # kg CO2eq per joule, Ireland
GRID_NO = 25  / 3_600_000   # kg CO2eq per joule, Norway

CONFIG_ORDER = ['C1', 'C2', 'C3', 'C4']
CONFIG_COLOURS = {'C1': '#4e79a7', 'C2': '#f28e2b', 'C3': '#59a14f', 'C4': '#e15759'}

BONFERRONI_ALPHA = 0.017   # 0.05 / 3 comparisons

print('Libraries loaded ✓')

## 1. Load Data

In [None]:
df = pd.read_csv('../results/raw_data.csv')

# Ensure numeric types
df['energy_joules']    = pd.to_numeric(df['energy_joules'],    errors='coerce')
df['duration_seconds'] = pd.to_numeric(df['duration_seconds'], errors='coerce')

# Drop incomplete rows
df = df.dropna(subset=['energy_joules', 'duration_seconds', 'config'])

print(f'Loaded {len(df)} measurement rows')
print(f'Configs: {sorted(df["config"].unique())}')
print(f'Stages:  {sorted(df["stage"].unique())}')
df.head()

## 2. Descriptive Statistics

In [None]:
desc = (
    df
    .groupby(['config', 'stage'])['energy_joules']
    .agg(['mean', 'median', 'std', 'count'])
    .rename(columns={'mean': 'mean_J', 'median': 'median_J', 'std': 'std_J', 'count': 'n'})
    .round(4)
)

# Total energy per config (all stages combined)
total_per_config = (
    df
    .groupby('config')['energy_joules']
    .agg(['mean', 'median', 'std', 'count'])
    .rename(columns={'mean': 'mean_J', 'median': 'median_J', 'std': 'std_J', 'count': 'n'})
    .round(4)
)

print('=== Per config/stage ===')
print(desc.to_string())
print('\n=== Total per config (all stages) ===')
print(total_per_config.to_string())

## 3. Normality Tests (Shapiro-Wilk)

In [None]:
print(f'{'Config':<6} {'Stage':<30} {'n':>5} {'W-stat':>10} {'p-value':>12} {'Normal (p>0.05)?':>18}')
print('-' * 80)

normality_results = {}

for (config, stage), grp in df.groupby(['config', 'stage']):
    vals = grp['energy_joules'].dropna().values
    if len(vals) < 3:
        continue
    stat, p = stats.shapiro(vals)
    key = (config, stage)
    normality_results[key] = {'W': stat, 'p': p, 'normal': p > 0.05}
    normal_str = 'YES' if p > 0.05 else 'NO'
    print(f'{config:<6} {stage:<30} {len(vals):>5} {stat:>10.4f} {p:>12.4f} {normal_str:>18}')

## 4. Wilcoxon Signed-Rank Tests (C2, C3, C4 vs C1)

In [None]:
def cliffs_delta(a, b):
    """Compute Cliff's delta effect size between two samples."""
    a, b = np.array(a), np.array(b)
    greater = sum(1 for ai in a for bi in b if ai > bi)
    lesser  = sum(1 for ai in a for bi in b if ai < bi)
    return (greater - lesser) / (len(a) * len(b))

def interpret_delta(d):
    ad = abs(d)
    if ad < 0.147:  return 'negligible'
    if ad < 0.330:  return 'small'
    if ad < 0.474:  return 'medium'
    return 'large'

c1_total = df[df['config'] == 'C1']['energy_joules'].dropna().values

wilcoxon_results = []
print(f'Bonferroni-corrected α = {BONFERRONI_ALPHA}\n')
print(f'{'Comparison':<15} {'n_C1':>5} {'n_Cx':>5} {'statistic':>12} {'p-value':>12} {'Sig?':>6} {"Cliff's d":>10} {'Effect':>12}')
print('-' * 85)

for config in ['C2', 'C3', 'C4']:
    cx_total = df[df['config'] == config]['energy_joules'].dropna().values
    if len(cx_total) == 0 or len(c1_total) == 0:
        print(f'  {config} vs C1: insufficient data')
        continue

    # Wilcoxon requires equal-length paired samples; use ranks via Mann-Whitney as fallback
    try:
        min_len = min(len(c1_total), len(cx_total))
        stat, p = stats.wilcoxon(c1_total[:min_len], cx_total[:min_len])
    except ValueError:
        stat, p = stats.mannwhitneyu(c1_total, cx_total, alternative='two-sided')

    d = cliffs_delta(c1_total, cx_total)
    sig = 'YES' if p < BONFERRONI_ALPHA else 'NO'
    label = interpret_delta(d)

    wilcoxon_results.append({'comparison': f'{config} vs C1', 'stat': stat, 'p': p,
                             'significant': sig, 'cliffs_d': d, 'effect': label})
    print(f'{config + " vs C1":<15} {len(c1_total):>5} {len(cx_total):>5} {stat:>12.4f} {p:>12.4f} {sig:>6} {d:>10.4f} {label:>12}')

## 5. SCI Score Calculation

In [None]:
# SCI = (E × I) / R
# E = energy in joules (mean per run)
# I = grid carbon intensity (kg CO2eq/J)
# R = 1 functional unit (one full CI run)
# Result expressed in gCO2eq for readability (×1000)

sci_rows = []

for config in CONFIG_ORDER:
    mean_J = df[df['config'] == config]['energy_joules'].mean()
    if not np.isfinite(mean_J):
        continue

    # Convert J → kWh for SCI computation, then to gCO2eq
    mean_kWh = mean_J / 3_600_000
    sci_ie_g = mean_kWh * 345 * 1000   # gCO2eq, Ireland
    sci_no_g = mean_kWh * 25  * 1000   # gCO2eq, Norway

    sci_rows.append({
        'config': config,
        'mean_energy_J': round(mean_J, 4),
        'SCI_Ireland_gCO2eq': round(sci_ie_g, 6),
        'SCI_Norway_gCO2eq':  round(sci_no_g, 6),
    })

sci_df = pd.DataFrame(sci_rows)
print('=== SCI Scores (gCO₂eq per CI run) ===')
print(sci_df.to_string(index=False))

## 6. Chart 1 — Mean Energy per Config (Bar Chart with Error Bars)

In [None]:
agg = df.groupby('config')['energy_joules'].agg(['mean', 'std']).reindex(CONFIG_ORDER)

fig, ax = plt.subplots(figsize=(7, 4))
bars = ax.bar(
    agg.index,
    agg['mean'],
    yerr=agg['std'],
    capsize=6,
    color=[CONFIG_COLOURS.get(c, '#999') for c in agg.index],
    edgecolor='white',
    linewidth=0.8,
    error_kw={'ecolor': '#333', 'lw': 1.2},
    width=0.55,
)

ax.set_xlabel('Experiment Configuration', fontsize=11)
ax.set_ylabel('Mean Energy Consumption (J)', fontsize=11)
ax.set_title('Mean Energy per CI Configuration\n(error bars = ±1 SD)', fontsize=12, fontweight='bold')
ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.tick_params(axis='both', which='major', labelsize=10)

# Annotate bar tops
for bar, (_, row) in zip(bars, agg.iterrows()):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + row['std'] * 1.05,
            f"{row['mean']:.2f}J", ha='center', va='bottom', fontsize=9)

fig.tight_layout()
out = FIGURES_DIR / 'fig1_mean_energy_bar.png'
fig.savefig(out, bbox_inches='tight')
print(f'Saved → {out}')
plt.show()

## 7. Chart 2 — Energy Distribution per Config (Box Plot)

In [None]:
data_by_config = [df[df['config'] == c]['energy_joules'].dropna().values for c in CONFIG_ORDER]

fig, ax = plt.subplots(figsize=(7, 4))
bp = ax.boxplot(
    data_by_config,
    labels=CONFIG_ORDER,
    patch_artist=True,
    medianprops={'color': 'black', 'linewidth': 2},
    whiskerprops={'linewidth': 1.2},
    capprops={'linewidth': 1.5},
    flierprops={'marker': 'o', 'markersize': 4, 'alpha': 0.5},
)

for patch, config in zip(bp['boxes'], CONFIG_ORDER):
    patch.set_facecolor(CONFIG_COLOURS.get(config, '#aaa'))
    patch.set_alpha(0.75)

ax.set_xlabel('Experiment Configuration', fontsize=11)
ax.set_ylabel('Energy Consumption (J)', fontsize=11)
ax.set_title('Energy Distribution per CI Configuration', fontsize=12, fontweight='bold')
ax.tick_params(axis='both', which='major', labelsize=10)

fig.tight_layout()
out = FIGURES_DIR / 'fig2_energy_boxplot.png'
fig.savefig(out, bbox_inches='tight')
print(f'Saved → {out}')
plt.show()

## 8. Chart 3 — SCI Comparison: C1 vs C4 in Ireland and Norway

In [None]:
sci_plot = sci_df[sci_df['config'].isin(['C1', 'C4'])].set_index('config')

fig, ax = plt.subplots(figsize=(7, 4))
x = np.arange(2)
width = 0.32

bars_ie = ax.bar(x - width/2, sci_plot['SCI_Ireland_gCO2eq'],
                 width, label='Ireland (345 gCO₂eq/kWh)',
                 color='#d94f3d', edgecolor='white')
bars_no = ax.bar(x + width/2, sci_plot['SCI_Norway_gCO2eq'],
                 width, label='Norway (25 gCO₂eq/kWh)',
                 color='#5ba85d', edgecolor='white')

ax.set_xticks(x)
ax.set_xticklabels(['C1 (Baseline)', 'C4 (Combined)'], fontsize=11)
ax.set_ylabel('SCI Score (gCO₂eq per CI run)', fontsize=11)
ax.set_title('Carbon Intensity: C1 vs C4\nIreland vs Norway Grid', fontsize=12, fontweight='bold')
ax.legend(fontsize=10)

for bar_ie, bar_no in zip(bars_ie, bars_no):
    ax.text(bar_ie.get_x() + bar_ie.get_width()/2, bar_ie.get_height() * 1.02,
            f"{bar_ie.get_height():.4f}", ha='center', fontsize=8)
    ax.text(bar_no.get_x() + bar_no.get_width()/2, bar_no.get_height() * 1.02,
            f"{bar_no.get_height():.4f}", ha='center', fontsize=8)

fig.tight_layout()
out = FIGURES_DIR / 'fig3_sci_comparison.png'
fig.savefig(out, bbox_inches='tight')
print(f'Saved → {out}')
plt.show()

## 9. Paper-Ready Results Summary

In [None]:
print('=' * 70)
print('  RESULTS SUMMARY — CI/CD Carbon Emissions Study')
print('  Ready to copy into IEEE paper')
print('=' * 70)

print('\n--- Table I: Descriptive Statistics (Total Energy per Configuration) ---')
print(f'{"Config":<8} {"Mean (J)":>10} {"Median (J)":>12} {"SD (J)":>10} {"N":>6}')
print('-' * 50)
for config in CONFIG_ORDER:
    grp = df[df['config'] == config]['energy_joules'].dropna()
    if len(grp) == 0:
        continue
    print(f'{config:<8} {grp.mean():>10.4f} {grp.median():>12.4f} {grp.std():>10.4f} {len(grp):>6}')

print('\n--- Table II: Wilcoxon Tests vs C1 (Bonferroni α = 0.017) ---')
print(f'{"Comparison":<15} {"p-value":>10} {"Sig?":>6} {"Cliff's d":>10} {"Effect":>12}')
print('-' * 58)
for r in wilcoxon_results:
    print(f"{r['comparison']:<15} {r['p']:>10.4f} {r['significant']:>6} {r['cliffs_d']:>10.4f} {r['effect']:>12}")

print('\n--- Table III: SCI Scores ---')
print(f'{"Config":<8} {"Mean E (J)":>12} {"SCI IE (gCO2eq)":>20} {"SCI NO (gCO2eq)":>20}')
print('-' * 64)
for _, row in sci_df.iterrows():
    print(f"{row['config']:<8} {row['mean_energy_J']:>12.4f} {row['SCI_Ireland_gCO2eq']:>20.6f} {row['SCI_Norway_gCO2eq']:>20.6f}")

print('\n' + '=' * 70)