### GC Reasons in Firefox Beta46apz

In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import IPython
import re

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, Histogram
from montecarlino import grouped_permutation_test

%pylab inline
IPython.core.pylabtools.figsize(16, 7)

Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.
Populating the interactive namespace from numpy and matplotlib




In [2]:
def chi2_distance(xs, ys, eps = 1e-10, normalize = True):
    histA = xs.sum(axis=0)
    histB = ys.sum(axis=0)
    
    if normalize:
        histA = histA/histA.sum()
        histB = histB/histB.sum()
    
    d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
        for (a, b) in zip(histA, histB)])

    return d

def median_diff(xs, ys):
    return np.median(xs) - np.median(ys)

def compare_histogram(histogram, e10s, none10s):
    # Normalize individual histograms
    e10s = e10s.map(lambda x: x/x.sum())
    none10s = none10s.map(lambda x: x/x.sum())
    if "BLOCKED_ON_PLUGIN_MODULE_INIT_MS" in histogram:
        e10s = e10s.map(lambda x: x[x.index > 0])
        none10s = none10s.map(lambda x: x[x.index > 0])
        if e10s.sum().sum() <= 0 or none10s.sum().sum() <= 0:
            return
    
    pvalue = grouped_permutation_test(chi2_distance, [e10s, none10s], num_samples=100)
    
    eTotal = e10s.sum()
    nTotal = none10s.sum()
        
    eTotal = 100*eTotal/eTotal.sum()
    nTotal = 100*nTotal/nTotal.sum()
        
    fig = plt.figure()
    fig.subplots_adjust(hspace=0.3)
        
    ax = fig.add_subplot(1, 1, 1)
    ax2 = ax.twinx()
    width = 0.4
    ylim = max(eTotal.max(), nTotal.max())
        
    eTotal.plot(kind="bar", alpha=0.5, color="green", label="e10s", ax=ax, width=width, position=0, ylim=(0, ylim + 1))
    nTotal.plot(kind="bar", alpha=0.5, color="blue", label="non e10s", ax=ax2, width=width, position=1, grid=False, ylim=ax.get_ylim())
        
    ax.legend(ax.get_legend_handles_labels()[0] + ax2.get_legend_handles_labels()[0],
              ["e10s ({} samples".format(len(e10s)), "non e10s ({} samples)".format(len(none10s))])

    plt.title(histogram)
    plt.xlabel(histogram)
    plt.ylabel("Frequency %")
    plt.show()
        
    print "The probability that the distributions for {} are differing by chance is {:.2f}.".format(histogram, pvalue)

def normalize_uptime_hour(frame):
    frame = frame[frame["payload/simpleMeasurements/uptime"] > 0]
    frame = 60 * frame.apply(lambda x: x/frame["payload/simpleMeasurements/uptime"]) # Metric per hour
    frame.drop('payload/simpleMeasurements/uptime', axis=1, inplace=True)
    return frame
    
def compare_count_histograms(pings, *histograms_names):
    properties = histograms_names + ("payload/simpleMeasurements/uptime", "e10s")

    frame = pd.DataFrame(get_pings_properties(pings, properties).collect())

    e10s = frame[frame["e10s"] == True]
    e10s = normalize_uptime_hour(e10s)
    
    none10s = frame[frame["e10s"] == False]
    none10s = normalize_uptime_hour(none10s)
    
    for histogram in e10s.columns:
        if histogram == "e10s" or histogram.endswith("_parent") or histogram.endswith("_children"):
            continue
            
        compare_scalars(histogram + " per hour", e10s[histogram].dropna(), none10s[histogram].dropna())

        
def compare_histograms(pings, *histogram_names):
    frame = pd.DataFrame(get_pings_properties(pings, histogram_names + ("e10s",) , with_processes=True).collect())
    compare_df(frame)
    
def compare_df(frame):
    e10s = frame[frame["e10s"] == True]
    none10s = frame[frame["e10s"] == False]
    for histogram in none10s.columns:
        if histogram == "e10s" or histogram.endswith("_parent") or histogram.endswith("_children"):
            continue
            
        has_children = np.sum(e10s[histogram + "_children"].notnull()) > 0
        has_parent = np.sum(e10s[histogram + "_parent"].notnull()) > 0
        
        if has_children and has_parent:
            compare_histogram(histogram + " (parent + children)", e10s[histogram].dropna(), none10s[histogram].dropna())
            
        if has_parent:
            compare_histogram(histogram + " (parent)", e10s[histogram + "_parent"].dropna(), none10s[histogram].dropna())
            
        if has_children:
            compare_histogram(histogram + " (children)", e10s[histogram + "_children"].dropna(), none10s[histogram].dropna())
                    

                
def compare_scalars(metric, *groups):
    print "Median difference in {} is {:.2f}, ({:.2f}, {:.2f}).".format(metric,
                                                                        median_diff(*groups), 
                                                                        np.median(groups[0]),
                                                                        np.median(groups[1]))
    print "The probablity of this effect being purely by chance is {:.2f}.\n". \
        format(grouped_permutation_test(median_diff, groups, num_samples=10000))

In [3]:
sc.defaultParallelism

64

In [4]:
dataset = sqlContext.read.load("s3://telemetry-parquet/e10s_experiment/e10s_beta46_cohorts/v20160405", "parquet")

In [5]:
sampled = dataset.filter(dataset.sampleId <= 30)

In [6]:
def row_2_ping(row):
    ping = {"payload": {"simpleMeasurements": json.loads(row.simpleMeasurements) if row.simpleMeasurements else {},
                        "histograms": json.loads(row.histograms) if row.histograms else {},
                        "keyedHistograms": json.loads(row.keyedHistograms) if row.keyedHistograms else {},
                        "childPayloads": json.loads(row.childPayloads) if row.childPayloads else {},
                        "threadHangStats": json.loads(row.threadHangStats)} if row.threadHangStats else {},
           "e10s": True if row.e10sCohort == "test" else False}
    return ping

In [7]:
subset = sampled.rdd.filter(lambda r: r.e10sCohort in ["test", "control"]).map(row_2_ping)
subset_count = subset.count()
subset_count

428576

For e10s use sum of child payloads' uptimes and reasons. For !e10s, use parent's

In [8]:
def reason(p):
    out = {
        "e10s": p["e10s"],
    }
    if p["e10s"]:
        out["payload/simpleMeasurements/uptime"] = 0;
        for childPayload in p["payload"]["childPayloads"]:
            for r in childPayload.get("histograms", {}).get("GC_REASON_2", {}).get("values", {}).iteritems():
                if out.get(r[0], None) is not None:
                    out[r[0]] += r[1]
                else:
                    out[r[0]] = r[1]
            out["payload/simpleMeasurements/uptime"] += childPayload["simpleMeasurements"]["uptime"]
    else:
        out["payload/simpleMeasurements/uptime"] = p["payload"]["simpleMeasurements"]["uptime"]
        for r in p["payload"].get("histograms", {}).get("GC_REASON_2", {}).get("values", {}).iteritems():
            out[r[0]] = r[1]
    
    return out;

reasons = subset.map(reason)

In [9]:
reason_df = pd.DataFrame(reasons.collect())

In [10]:
e10s_reason_df = normalize_uptime_hour(reason_df[reason_df["e10s"] == True])
none10s_reason_df = normalize_uptime_hour(reason_df[reason_df["e10s"] == False])

In [19]:
reason_nums = set()
for reason in e10s_reason_df.columns:
    try:
        reason_nums.add(int(reason))
    except:
        pass
for r in sorted(reason_nums):
    reason = str(r)
    if len(e10s_reason_df[reason].dropna()) == 0:
        print "E10s has no reports of reason {}. Median of !e10s' {} per hour is {:.2f}\n".format(reason, reason, np.median(none10s_reason_df[reason].dropna()))
    elif len(none10s_reason_df[reason].dropna()) == 0:
        print "!E10s has no reports of reason {}. Median of e10s' {} per hour is {:.2f}\n".format(reason, reason, np.median(e10s_reason_df[reason].dropna()))                                                                                            
    else:
        compare_scalars(reason + " per hour", e10s_reason_df[reason].dropna(), none10s_reason_df[reason].dropna())

Median difference in 0 per hour is -2.22, (0.00, 2.22).
The probablity of this effect being purely by chance is 0.00.

Median difference in 1 per hour is 0.06, (1.03, 0.98).
The probablity of this effect being purely by chance is 0.00.

!E10s has no reports of reason 3. Median of e10s' 3 per hour is 0.00

Median difference in 4 per hour is 0.00, (0.00, 0.00).
The probablity of this effect being purely by chance is 1.00.

Median difference in 5 per hour is -1.22, (0.00, 1.22).
The probablity of this effect being purely by chance is 0.00.

Median difference in 6 per hour is 0.13, (0.98, 0.85).
The probablity of this effect being purely by chance is 0.00.

E10s has no reports of reason 7. Median of !e10s' 7 per hour is nan

Median difference in 13 per hour is 0.00, (0.00, 0.00).



Mean of empty slice.



The probablity of this effect being purely by chance is 1.00.

Median difference in 14 per hour is 0.43, (4.18, 3.75).
The probablity of this effect being purely by chance is 0.00.

Median difference in 15 per hour is -0.07, (0.37, 0.44).
The probablity of this effect being purely by chance is 0.17.

Median difference in 16 per hour is 0.01, (0.17, 0.16).
The probablity of this effect being purely by chance is 0.23.

!E10s has no reports of reason 33. Median of e10s' 33 per hour is 0.00

Median difference in 34 per hour is 2.08, (5.83, 3.75).
The probablity of this effect being purely by chance is 0.19.

Median difference in 35 per hour is -7.66, (0.00, 7.66).
The probablity of this effect being purely by chance is 0.00.

Median difference in 36 per hour is -18.57, (71.43, 90.00).
The probablity of this effect being purely by chance is 0.00.

Median difference in 37 per hour is -0.20, (1.30, 1.50).
The probablity of this effect being purely by chance is 0.00.

Median difference in 38 p