In [123]:
import pandas as pd
from dateutil.parser import isoparse


start = isoparse("2022-12-01 00:00:00+0000")
breakpoint = isoparse("2023-02-08 00:00:00+0000")
stop = isoparse("2023-03-07 23:59:59+0000")

sec_to_day = lambda x: x * 60 * 60 * 24

# DataDog was being weird, probably on account of the incredibly small amount of data for failed requests,
# so I had to split these into successful (ok) and unsuccessful (not_ok) requests
ok = pd.read_csv("data/adzerk/ok.csv")
not_ok = pd.read_csv("data/adzerk/not_ok.csv")


# All the bad requests that showed up in DataDog, sorted by their error codes
fivehundreds = not_ok.loc[not_ok.group == "bifrost.status:500"].loc[not_ok.value.isna() == False].reset_index()
fivehundreds = fivehundreds.rename(columns = {"value": "500s/sec"})
fourtwonines = not_ok.loc[not_ok.group == "bifrost.status:429"].loc[not_ok.value.isna() == False].reset_index()
fourtwonines = fourtwonines.rename(columns = {"value": "429s/sec"})
fourzeroones = not_ok.loc[not_ok.group == "bifrost.status:401"].loc[not_ok.value.isna() == False].reset_index()
fourzeroones = fourzeroones.rename(columns={"value": "401s/sec"})

# All the normal requests
ok = ok.rename(columns={"value": "200s/sec"})
ok = ok.loc[ok.time.apply(isoparse) >= start].loc[ok.time.apply(isoparse) <= stop].reset_index()


# Smoosh everything into a single dataframe
df = ok[["time", "200s/sec"]].merge(fourzeroones[["time", "401s/sec"]], on="time", how="left")
df = df.merge(fourtwonines[["time", "429s/sec"]], on="time", how="left")
df = df.merge(fivehundreds[["time", "500s/sec"]], on="time", how="left")
df = df.fillna(0)

# Add daily values
df["total 200s"] = df["200s/sec"].apply(sec_to_day).round(0).apply(int)
df["total 401s"] = df["401s/sec"].apply(sec_to_day).round(0).apply(int)
df["total 429s"] = df["429s/sec"].apply(sec_to_day).round(0).apply(int)
df["total 500s"] = df["500s/sec"].apply(sec_to_day).round(0).apply(int)
df["total errors"] = df["total 401s"] + \
                     df["total 429s"] + \
                     df["total 500s"]

# failure rate per day
df["failure %"] = df["total errors"] / (df["total errors"] + df["total 200s"]) * 100
df = df.fillna(0)

# divide the dataset using Mozilla's initial division: Dec 1 - Feb 7, Feb 8 - Mar 7
before = df.loc[df.time.apply(isoparse) < breakpoint].reset_index()
after = df.loc[df.time.apply(isoparse) >= breakpoint].reset_index()

In [158]:
fail_rate_before = before["total errors"].sum() / (before["total errors"].sum() + before["total 200s"].sum()) * 100
fail_rate_after = after["total errors"].sum() / (after["total errors"].sum() + after["total 200s"].sum()) * 100

print(f"Total failure percentage before Feb 7 {fail_rate_before :6f}%")
print(f"Total failure percentage after Feb 7 {fail_rate_after : 6f}%")

Failure percentage before Feb 7 0.000002%
Failure percentage after Feb 7  0.000029%
