In [1]:
import os
import sys
import logging
from tqdm import tqdm
from dotenv import load_dotenv
import requests

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger(__name__)

class GithubAPI:
    def __init__(self, token):
        self.token = token
        self.headers = {
            "Accept": "application/vnd.github.v3+json",
            "Authorization": f"Bearer {self.token}",
            "X-Github-Api-Version": "2022-11-28",
        }
        self.base_url = "https://api.github.com/repos/apache/beam"

    def fetchData(self, suburl):
        url = f"{self.base_url}/{suburl}"
        response = requests.get(url, headers=self.headers)
        return response.json()

load_dotenv()
api = GithubAPI(os.getenv("GITHUB_TOKEN"))

logger.info("Fetching and uploading push and schedule workflow runs...")
workflow_runs = []
with open('../merge-queues/commit_shas.txt', 'r') as file:
    # Read all lines from the file into a list
    commit_shas = file.readlines()
for sha in tqdm(commit_shas[0:1000]):
    push_runs = api.fetchData(f"actions/runs?event=push&head_sha={sha}")
    schedule_runs = api.fetchData(f"actions/runs?event=schedule&head_sha={sha}")
    file = open("jobs_urls.txt", "a")
    for run in (push_runs['workflow_runs'] + schedule_runs['workflow_runs']):
        if run['name'] == 'Build python source distribution and wheels':
            file.write(f"{run['jobs_url']}\n")
    file.close()

INFO:__main__:Fetching and uploading push and schedule workflow runs...


  0%|          | 1/1000 [00:01<26:51,  1.61s/it]

https://api.github.com/repos/apache/beam/actions/runs/7117415845/jobs



  0%|          | 2/1000 [00:02<18:21,  1.10s/it]

https://api.github.com/repos/apache/beam/actions/runs/8023036513/jobs



  0%|          | 3/1000 [00:03<21:03,  1.27s/it]

https://api.github.com/repos/apache/beam/actions/runs/7119612852/jobs



  0%|          | 5/1000 [00:05<16:06,  1.03it/s]

https://api.github.com/repos/apache/beam/actions/runs/7630543901/jobs



  1%|          | 6/1000 [00:06<18:08,  1.10s/it]

https://api.github.com/repos/apache/beam/actions/runs/7352042128/jobs

https://api.github.com/repos/apache/beam/actions/runs/7361469177/jobs



  1%|          | 7/1000 [00:07<17:42,  1.07s/it]

https://api.github.com/repos/apache/beam/actions/runs/7453405220/jobs



  1%|          | 8/1000 [00:08<17:51,  1.08s/it]

https://api.github.com/repos/apache/beam/actions/runs/8346553030/jobs



  1%|          | 9/1000 [00:09<18:16,  1.11s/it]

https://api.github.com/repos/apache/beam/actions/runs/7425734350/jobs



  1%|          | 10/1000 [00:11<21:11,  1.28s/it]

https://api.github.com/repos/apache/beam/actions/runs/7643379512/jobs



  1%|          | 11/1000 [00:13<22:01,  1.34s/it]

https://api.github.com/repos/apache/beam/actions/runs/7145921057/jobs



  1%|          | 12/1000 [00:13<19:21,  1.18s/it]

https://api.github.com/repos/apache/beam/actions/runs/7116528478/jobs



  1%|▏         | 13/1000 [00:15<21:35,  1.31s/it]

https://api.github.com/repos/apache/beam/actions/runs/7063102196/jobs



  2%|▏         | 15/1000 [00:16<14:58,  1.10it/s]

https://api.github.com/repos/apache/beam/actions/runs/7196564678/jobs



  2%|▏         | 16/1000 [00:18<18:37,  1.14s/it]

https://api.github.com/repos/apache/beam/actions/runs/7977480701/jobs



  2%|▏         | 17/1000 [00:18<16:22,  1.00it/s]

https://api.github.com/repos/apache/beam/actions/runs/7143486810/jobs



  2%|▏         | 18/1000 [00:20<17:30,  1.07s/it]

https://api.github.com/repos/apache/beam/actions/runs/7892464559/jobs



  2%|▏         | 20/1000 [00:21<16:32,  1.01s/it]

https://api.github.com/repos/apache/beam/actions/runs/6973656923/jobs



  2%|▏         | 21/1000 [00:23<18:08,  1.11s/it]

https://api.github.com/repos/apache/beam/actions/runs/7183966271/jobs



  2%|▏         | 22/1000 [00:24<18:34,  1.14s/it]

https://api.github.com/repos/apache/beam/actions/runs/7662373792/jobs



  2%|▏         | 23/1000 [00:25<16:54,  1.04s/it]

https://api.github.com/repos/apache/beam/actions/runs/7918710986/jobs



  2%|▏         | 24/1000 [00:26<17:08,  1.05s/it]

https://api.github.com/repos/apache/beam/actions/runs/7341665686/jobs



  2%|▎         | 25/1000 [00:27<17:59,  1.11s/it]

https://api.github.com/repos/apache/beam/actions/runs/7289593874/jobs



  3%|▎         | 26/1000 [00:28<15:52,  1.02it/s]

https://api.github.com/repos/apache/beam/actions/runs/7182215595/jobs



  3%|▎         | 27/1000 [00:29<18:56,  1.17s/it]

https://api.github.com/repos/apache/beam/actions/runs/7467361052/jobs



  3%|▎         | 28/1000 [00:31<20:23,  1.26s/it]

https://api.github.com/repos/apache/beam/actions/runs/7210830638/jobs



  3%|▎         | 29/1000 [00:32<21:58,  1.36s/it]

https://api.github.com/repos/apache/beam/actions/runs/7631348835/jobs



  3%|▎         | 30/1000 [00:34<21:34,  1.33s/it]

https://api.github.com/repos/apache/beam/actions/runs/7255249134/jobs



  3%|▎         | 31/1000 [00:35<21:31,  1.33s/it]

https://api.github.com/repos/apache/beam/actions/runs/7182282892/jobs



  3%|▎         | 32/1000 [00:35<17:38,  1.09s/it]

https://api.github.com/repos/apache/beam/actions/runs/7132601708/jobs



  3%|▎         | 33/1000 [00:37<19:43,  1.22s/it]

https://api.github.com/repos/apache/beam/actions/runs/8205853690/jobs



  3%|▎         | 34/1000 [00:38<19:32,  1.21s/it]

https://api.github.com/repos/apache/beam/actions/runs/6825816511/jobs



  4%|▎         | 36/1000 [00:39<13:01,  1.23it/s]

https://api.github.com/repos/apache/beam/actions/runs/6961521680/jobs



  4%|▎         | 37/1000 [00:40<16:00,  1.00it/s]

https://api.github.com/repos/apache/beam/actions/runs/8267504948/jobs



  4%|▍         | 38/1000 [00:42<17:11,  1.07s/it]

https://api.github.com/repos/apache/beam/actions/runs/7453051298/jobs



  4%|▍         | 39/1000 [00:43<18:26,  1.15s/it]

https://api.github.com/repos/apache/beam/actions/runs/7279115091/jobs



  4%|▍         | 40/1000 [00:45<21:17,  1.33s/it]

https://api.github.com/repos/apache/beam/actions/runs/8087949273/jobs



  4%|▍         | 41/1000 [00:46<22:07,  1.38s/it]

https://api.github.com/repos/apache/beam/actions/runs/6936307209/jobs



  4%|▍         | 42/1000 [00:48<21:54,  1.37s/it]

https://api.github.com/repos/apache/beam/actions/runs/7174596029/jobs



  4%|▍         | 43/1000 [00:49<20:41,  1.30s/it]

https://api.github.com/repos/apache/beam/actions/runs/7132736553/jobs



  4%|▍         | 44/1000 [00:50<21:33,  1.35s/it]

https://api.github.com/repos/apache/beam/actions/runs/7277931685/jobs



  4%|▍         | 45/1000 [00:52<23:15,  1.46s/it]

https://api.github.com/repos/apache/beam/actions/runs/6772814462/jobs



  5%|▍         | 48/1000 [00:55<19:20,  1.22s/it]

https://api.github.com/repos/apache/beam/actions/runs/7547728618/jobs



  5%|▍         | 49/1000 [00:56<20:04,  1.27s/it]

https://api.github.com/repos/apache/beam/actions/runs/6791397375/jobs



  5%|▌         | 50/1000 [00:57<17:37,  1.11s/it]

https://api.github.com/repos/apache/beam/actions/runs/7804351657/jobs



  5%|▌         | 51/1000 [00:59<19:58,  1.26s/it]

https://api.github.com/repos/apache/beam/actions/runs/7504048170/jobs



  5%|▌         | 53/1000 [01:00<17:58,  1.14s/it]

https://api.github.com/repos/apache/beam/actions/runs/8149866908/jobs



  6%|▌         | 55/1000 [01:01<12:51,  1.22it/s]

https://api.github.com/repos/apache/beam/actions/runs/6880996620/jobs



  6%|▌         | 56/1000 [01:02<13:17,  1.18it/s]

https://api.github.com/repos/apache/beam/actions/runs/6880755436/jobs



  6%|▌         | 57/1000 [01:03<14:47,  1.06it/s]

https://api.github.com/repos/apache/beam/actions/runs/8367308615/jobs



  6%|▌         | 58/1000 [01:04<12:56,  1.21it/s]

https://api.github.com/repos/apache/beam/actions/runs/7007899269/jobs



  6%|▌         | 59/1000 [01:05<14:18,  1.10it/s]

https://api.github.com/repos/apache/beam/actions/runs/6828900979/jobs



  6%|▌         | 60/1000 [01:07<17:47,  1.14s/it]

https://api.github.com/repos/apache/beam/actions/runs/8365619685/jobs



  6%|▌         | 61/1000 [01:08<19:14,  1.23s/it]

https://api.github.com/repos/apache/beam/actions/runs/8397176093/jobs



  6%|▌         | 62/1000 [01:10<19:30,  1.25s/it]

https://api.github.com/repos/apache/beam/actions/runs/7571793546/jobs



  6%|▋         | 64/1000 [01:11<13:58,  1.12it/s]

https://api.github.com/repos/apache/beam/actions/runs/7890698200/jobs



  7%|▋         | 66/1000 [01:12<11:46,  1.32it/s]

https://api.github.com/repos/apache/beam/actions/runs/7133156418/jobs



  7%|▋         | 67/1000 [01:13<12:22,  1.26it/s]

https://api.github.com/repos/apache/beam/actions/runs/7731342625/jobs



  7%|▋         | 68/1000 [01:13<11:43,  1.32it/s]

https://api.github.com/repos/apache/beam/actions/runs/7631691739/jobs



  7%|▋         | 69/1000 [01:14<12:25,  1.25it/s]

https://api.github.com/repos/apache/beam/actions/runs/6831599480/jobs



  7%|▋         | 70/1000 [01:15<14:27,  1.07it/s]

https://api.github.com/repos/apache/beam/actions/runs/6695365073/jobs



  7%|▋         | 71/1000 [01:17<15:55,  1.03s/it]

https://api.github.com/repos/apache/beam/actions/runs/7450803243/jobs



  7%|▋         | 72/1000 [01:18<16:09,  1.04s/it]

https://api.github.com/repos/apache/beam/actions/runs/6894491151/jobs



  7%|▋         | 73/1000 [01:19<17:04,  1.10s/it]

https://api.github.com/repos/apache/beam/actions/runs/7589350229/jobs



  8%|▊         | 75/1000 [01:20<14:06,  1.09it/s]

https://api.github.com/repos/apache/beam/actions/runs/7508773935/jobs



  8%|▊         | 76/1000 [01:22<15:42,  1.02s/it]

https://api.github.com/repos/apache/beam/actions/runs/7616609339/jobs



  8%|▊         | 77/1000 [01:23<16:36,  1.08s/it]

https://api.github.com/repos/apache/beam/actions/runs/6834189781/jobs



  8%|▊         | 78/1000 [01:24<16:49,  1.09s/it]

https://api.github.com/repos/apache/beam/actions/runs/8394443476/jobs



  8%|▊         | 80/1000 [01:26<15:53,  1.04s/it]

https://api.github.com/repos/apache/beam/actions/runs/7889937534/jobs



  8%|▊         | 81/1000 [01:27<18:55,  1.24s/it]

https://api.github.com/repos/apache/beam/actions/runs/7856184041/jobs



  8%|▊         | 82/1000 [01:29<18:43,  1.22s/it]

https://api.github.com/repos/apache/beam/actions/runs/7168863518/jobs



  8%|▊         | 83/1000 [01:29<16:58,  1.11s/it]

https://api.github.com/repos/apache/beam/actions/runs/7105709691/jobs



  8%|▊         | 83/1000 [01:31<16:46,  1.10s/it]


KeyboardInterrupt: 

In [2]:
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt

with open('jobs_urls.txt', 'r') as file:
    # Read all lines into a list
    jobs_urls = file.readlines()
    
load_dotenv()
api = GithubAPI(os.getenv("GITHUB_TOKEN"))

def calculate_build_mins(job):
    start = job['started_at']
    complete = job['completed_at']
    timestamp1 = datetime.fromisoformat(start.replace("Z", "+00:00"))
    timestamp2 = datetime.fromisoformat(complete.replace("Z", "+00:00"))
    difference_in_minutes = (timestamp2 - timestamp1).total_seconds() / 60
    return difference_in_minutes

python_wheel_build_jobs = [
    'Build python wheels on auto for macos-latest',
    'Build python wheels on auto for windows-latest',
    'Build python wheels on aarch64 for ubuntu-latest',
    'Build python wheels on auto for ubuntu-latest'
]

parallelized_build_times = []
current_build_times = []
for jobs in tqdm(jobs_urls):
    sub_url = jobs.split('https://api.github.com/repos/apache/beam/')[1].rstrip('\n')
    run = api.fetchData(sub_url)
    python_wheel_build_job_mins = []
    for job in run['jobs']:
        if job['name'] in python_wheel_build_jobs:
            python_wheel_build_job_mins.append(calculate_build_mins(job))
    if python_wheel_build_job_mins:
        parallelized_build_time = max(python_wheel_build_job_mins)
        current_build_time = sum(python_wheel_build_job_mins)
        parallelized_build_times.append(parallelized_build_time)
        current_build_times.append(current_build_time)

hist, bins = np.histogram(parallelized_build_times, bins=20, range=(0, max(current_build_times)+1))
plt.bar(bins[:-1], hist, width=bins[1]-bins[0], align='edge')
plt.xlabel('Build Minutes')
plt.ylabel('# of Jobs')
plt.title('Histogram of Build Minutes With Parallelized Jobs')
plt.show()
hist, bins = np.histogram(current_build_times, bins=20, range=(0, max(current_build_times)+1))
plt.bar(bins[:-1], hist, width=bins[1]-bins[0], align='edge')
plt.xlabel('Build Minutes')
plt.ylabel('# of Jobs')
plt.title('Histogram of Build Minutes With Current Sequential Jobs')
plt.show()


 11%|█▏        | 97/845 [01:08<06:51,  1.82it/s]

In [70]:
print(parallelized_build_times)
print(current_build_times)

mean_value = np.mean(parallelized_build_times)
std_deviation = np.std(parallelized_build_times)

print(f"Mean: {mean_value}")
print(f"Standard Deviation: {std_deviation}")

mean_value = np.mean(current_build_times)
std_deviation = np.std(current_build_times)

print(f"Mean: {mean_value}")
print(f"Standard Deviation: {std_deviation}")

[13.05, 39.666666666666664, 34.68333333333333, 4.216666666666667, 89.66666666666667, 90.08333333333333, 87.41666666666667, 13.85, 43.61666666666667, 67.73333333333333, 70.51666666666667, 45.53333333333333, 9.15, 95.56666666666666, 44.1, 89.06666666666666, 66.71666666666667, 89.78333333333333, 10.45, 38.8, 14.866666666666667, 4.233333333333333, 90.53333333333333, 89.43333333333334, 33.38333333333333, 89.11666666666666, 91.85, 8.616666666666667, 48.31666666666667, 130.0, 0.4666666666666667, 93.36666666666666, 35.95, 88.38333333333334, 93.71666666666667, 88.73333333333333, 22.383333333333333, 15.816666666666666, 45.516666666666666, 13.516666666666667, 99.3, 126.3, 40.016666666666666, 92.53333333333333, 89.85, 10.65, 24.083333333333332, 30.5, 1.1666666666666667, 44.68333333333333, 92.8, 89.3, 93.83333333333333, 19.866666666666667, 15.216666666666667, 4.9, 13.116666666666667, 96.6, 89.08333333333333, 65.06666666666666, 19.183333333333334, 94.28333333333333, 89.35, 90.26666666666667, 2.11666

In [71]:
from scipy.stats import kstest

ks_statistic, p_value = kstest(parallelized_build_times, current_build_times)

print(f"KS Statistic: {ks_statistic}")
print(f"P-value: {p_value}")

if p_value < 0.05:
    print("Reject null hypothesis: Distributions are significantly different.")
else:
    print("Fail to reject null hypothesis: Distributions are not significantly different.")

KS Statistic: 0.6466165413533834
P-value: 1.6240583186800187e-157
Reject null hypothesis: Distributions are significantly different.
