# Windows CI time analyses

Across three groups: see how long Windows CI jobs are taking for conda/conda over the last three months.

I removed a few outliers but not sure if that was necessary, as there are so many extreme values, still.
After that, we can see the mean times for each group split by pytest-split, channel, Python version, and
type of test (unit v.s. integration).


In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../data/windows_ci_times.csv")

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)

display(df.head(10))

Unnamed: 0,run_id,run_date,job_name,duration_seconds,duration_minutes
0,19746320712,2025-11-27T19:32:27Z,"windows (3.10, defaults, unit, 1)",1142.0,19.033333
1,19746320712,2025-11-27T19:32:27Z,"windows (3.10, defaults, integration, 1)",1478.0,24.633333
2,19746320712,2025-11-27T19:32:27Z,"windows (3.13, defaults, unit, 1)",1030.0,17.166667
3,19746320712,2025-11-27T19:32:27Z,"windows (3.10, defaults, integration, 2)",1142.0,19.033333
4,19746320712,2025-11-27T19:32:27Z,"windows (3.13, defaults, integration, 3)",1094.0,18.233333
5,19746320712,2025-11-27T19:32:27Z,"windows (3.10, defaults, integration, 3)",1432.0,23.866667
6,19746320712,2025-11-27T19:32:27Z,"windows (3.13, defaults, integration, 1)",1316.0,21.933333
7,19746320712,2025-11-27T19:32:27Z,"windows (3.10, defaults, unit, 2)",919.0,15.316667
8,19746320712,2025-11-27T19:32:27Z,"windows (3.13, defaults, unit, 2)",939.0,15.65
9,19746320712,2025-11-27T19:32:27Z,"windows (3.13, conda-forge, unit, 1)",3268.0,54.466667


In [3]:
df["group"] = df["job_name"].str.extract(r", (\d+)\)$")[0]
df = df.dropna(subset=["group"])
df["group"] = df["group"].astype(int)

df["job_type"] = df["job_name"].apply(
    lambda x: "unit" if "unit" in x else "integration"
)
df["channel"] = df["job_name"].apply(
    lambda x: "conda-forge" if "conda-forge" in x else "defaults"
)
df["python_version"] = df["job_name"].str.extract(r"windows \((\d+\.\d+),")[0]

In [4]:
def remove_outliers(group_df):
    Q1 = group_df["duration_minutes"].quantile(0.25)
    Q3 = group_df["duration_minutes"].quantile(0.75)
    IQR = Q3 - Q1

    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    return group_df[
        (group_df["duration_minutes"] >= lower_bound)
        & (group_df["duration_minutes"] <= upper_bound)
    ]


df_cleaned = pd.concat(
    [
        remove_outliers(group)
        for _, group in df.groupby(["group", "job_type", "channel", "python_version"])
    ]
)

print(f"Removed {len(df) - len(df_cleaned)} outliers from {len(df)} total runs")

Removed 205 outliers from 4425 total runs


In [5]:
def format_time(minutes):
    mins = int(minutes)
    secs = int((minutes - mins) * 60)
    return f"{mins}m {secs}s"


stats_full = df_cleaned.groupby(["group", "job_type", "channel", "python_version"])[
    "duration_minutes"
].agg(["mean", "min", "max", "std", "count"])

for group in sorted(df_cleaned["group"].unique()):
    print(f"group {group}")

    for job_type in ["unit", "integration"]:
        for channel in ["defaults", "conda-forge"]:
            for py_ver in sorted(df_cleaned["python_version"].unique()):
                if (group, job_type, channel, py_ver) in stats_full.index:
                    row = stats_full.loc[(group, job_type, channel, py_ver)]
                    print(f"\n  {job_type}; {channel}; Python {py_ver}:")
                    print(f"    Mean: {format_time(row['mean'])}")
                    print(f"    Min:  {format_time(row['min'])}")
                    print(f"    Max:  {format_time(row['max'])}")
                    print(f"    Std:  {format_time(row['std'])}")
                    print(f"    Runs: {int(row['count'])}\n")

group 1

  unit; defaults; Python 3.10:
    Mean: 34m 19s
    Min:  18m 21s
    Max:  56m 1s
    Std:  12m 42s
    Runs: 74


  unit; defaults; Python 3.13:
    Mean: 17m 8s
    Min:  14m 7s
    Max:  26m 47s
    Std:  2m 21s
    Runs: 248


  unit; defaults; Python 3.9:
    Mean: 17m 13s
    Min:  14m 15s
    Max:  22m 26s
    Std:  1m 56s
    Runs: 219


  unit; conda-forge; Python 3.13:
    Mean: 55m 56s
    Min:  49m 31s
    Max:  65m 29s
    Std:  3m 31s
    Runs: 294


  integration; defaults; Python 3.10:
    Mean: 36m 47s
    Min:  20m 27s
    Max:  54m 24s
    Std:  11m 51s
    Runs: 58


  integration; defaults; Python 3.13:
    Mean: 26m 25s
    Min:  16m 42s
    Max:  53m 4s
    Std:  9m 48s
    Runs: 277


  integration; defaults; Python 3.9:
    Mean: 23m 30s
    Min:  15m 15s
    Max:  41m 6s
    Std:  8m 0s
    Runs: 221


  integration; conda-forge; Python 3.13:
    Mean: 45m 10s
    Min:  25m 33s
    Max:  106m 13s
    Std:  22m 56s
    Runs: 295

group 2

  unit; def