In [None]:
import pandas as pd
import glob

# Stability analysis
- how many of all channels have more than one result? -> they are in the stability frame
- how many of them are stable?
- "problem": some of them only have more than one result when including the unstable observations, after removing them, they only have one observation left!

In [None]:
# All possible channels
all_channels = 3 * 20 * 34
all_channels

In [None]:
stability_files = glob.glob("trees/2022-04-22-2/stab/*")
df = pd.DataFrame()
for file in stability_files:
    df = pd.concat([df, pd.read_csv(file, index_col=0)])

In [None]:
df = df.sort_values("ratio", ascending=False)
df.head(5)

In [None]:
df.describe()

In [None]:
a = df.loc[df["ratio"] < 0.01]
a.describe()

In [None]:
b = df.loc[df["ratio"] > 0.01]
b.describe()

In [None]:
b

In [None]:
with pd.option_context("display.max_colwidth", None):
    display(b.groupby("browser")["observation_method"].unique().to_frame())

## Num observations analysis
- Out of the stable channels
- How many observations remain

In [None]:
import json
channel_files = same_files = glob.glob("trees/2022-04-22-2/obs/*")
df = pd.DataFrame(columns=["browser", "inc_method", "observation_method", "observation", "count"])
for file in channel_files:
    file_2 = file.replace("fetch_errormessage", "fetch-errormessage").replace("fetch_response", "fetch-response")
    inc_method, observation_method, browser, *_ = file_2.split("obs/")[1].split("_")
    observation_method = observation_method.replace("fetch-errormessage", "fetch_errormessage").replace("fetch-response", "fetch_response")
    with open(file) as f:
        j = json.load(f)
    for _, val in j.items():
        df.loc[len(df)] = [browser, inc_method, observation_method, val["observation"], val["count"]]

In [None]:
df.groupby(["browser", "inc_method", "observation_method"])["observation"].count().to_frame()

In [None]:
df.loc[df["observation"].apply(str).str.contains("echo")]

In [None]:
df.groupby(["browser", "inc_method", "observation_method"])["observation"].count().to_frame().value_counts()

In [None]:
from pandas.api.types import CategoricalDtype
obs_methods = [
  "height",
  "width",
  "naturalHeight",
  "naturalWidth",
  "videoHeight",
  "videoWidth",
  "duration",
  "networkState",
  "readyState",
  "buffered",
  "paused",
  "seekable",
  "sheet",
  "error",
  "contentDocument",
  "length",
  "window.name",
  "CSS2Properties",
  "origin",
  "opener",
  "el-error",
  "el-blur",
  "el-message",
  "el-securitypolicyviolation.smooth",
  "history.length",
  "getComputedStyle",
  "hasOwnProperty-a",
  "windowHeight",
  "events-fired",
  "events-fired-all",
  "performanceAPI.smooth",
  "win.performanceAPI.smooth",
  "fetch_events",
  "fetch_errormessage",
  "fetch_response"
]
obs_type = CategoricalDtype(categories=obs_methods)

In [None]:
d = df.drop_duplicates(subset=["browser", "inc_method", "observation_method"])
d["inc_method"] = d["inc_method"].astype("category")
d["observation_method"] = d["observation_method"].astype(obs_type)
for grouping in [["inc_method", "observation_method"], ["inc_method"], ["observation_method"]]:
    channels = d.groupby(grouping)["browser"].unique()
    print(f"Unique channels: {len(channels.index.unique())}")
    channels =  channels.dropna().apply(sorted).to_frame()
    print(f"Unique channels (existing): {len(channels.index.unique())}, broken?")
    channels_all = channels.loc[channels["browser"].apply(len) == 3]
    channels_two = channels.loc[channels["browser"].apply(len) == 2]
    channels_one = channels.loc[channels["browser"].apply(len) == 1]
    for channels_x in [channels_all, channels_two, channels_one]:
        with pd.option_context("display.max_rows", 100):
            display(len(channels_x))
            display(channels_x)

In [None]:
for col in ["browser", "inc_method", "observation_method"]:
    with pd.option_context("display.max_rows", 100):
        display(d.groupby("browser")[col].value_counts().to_frame())
        display(d[col].value_counts().to_frame())

In [None]:
# All channels that only have one observation left after removing the tests with different results! (e.g., chromium link-stylesheet.sheet)
# Or channels that have only one observation left after removing observations with count less than 32! (e.g., firefox link-prefetch-events-fired-all)
with pd.option_context("display.max_rows", 78):
    display(pd.concat([a, df.drop_duplicates(subset=["browser", "inc_method", "observation_method"])]).drop_duplicates(subset=["browser", "inc_method", "observation_method"], keep=False))

## Sameness analysis
- all rows with less than 32 observations were removed!

In [None]:
same_files = glob.glob("trees/2022-04-22-2/same/*")
df = pd.DataFrame()
for file in same_files:
    tmp = pd.read_csv(file)
    df = pd.concat([df, tmp])
    tmp = tmp.loc[tmp['0'] < 200]
    tmp = tmp.loc[tmp['0'] > 0]
    if len(tmp) > 0:
        print(file)
        display(tmp)

In [None]:
df