### SHIELD Variations, Data Pull and Analysis

In [14]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history

from pprint import pprint as pp

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [15]:
sc.defaultParallelism

16

In [35]:
# helpers and utils

import datetime as DT
DAYS=14
today = DT.date.today()
week_ago = today - DT.timedelta(days=DAYS)
week_ago_fmt = week_ago.strftime("%Y%m%d")
today_fmt = today.strftime("%Y%m%d")

PINGNAME = 'x-shield-trials'


### Extract, Transform, Analyze

0.  [docs](https://github.com/mozilla/python_moztelemetry/blob/master/moztelemetry/spark.py)
1.  Get all heartbeat pings


In [41]:
kwargs = dict(
    doc_type="OTHER", 
    submission_date=(week_ago_fmt,today_fmt),
    app="Firefox"
)

def pingVersion(ping):
    v = ping['payload'].get('version','0.0.0').split(".")
    v = tuple(map(int,v))
    return v

pings = get_pings(sc, channel="release", **kwargs).union(
        get_pings(sc, channel="aurora",  **kwargs)).union(
        get_pings(sc, channel="beta",    **kwargs)).union(
        get_pings(sc, channel="nightly", **kwargs))

pings = pings.filter(lambda p: p["meta"]["docType"] == PINGNAME)
pings = pings.filter(lambda p: pingVersion(p) >= (0,0,16))

In [42]:
pings.count()
pings.first()


{u'application': {u'architecture': u'x86-64',
  u'buildId': u'20151029151421',
  u'channel': u'release',
  u'name': u'Firefox',
  u'platformVersion': u'42.0',
  u'vendor': u'Mozilla',
  u'version': u'42.0',
  u'xpcomAbi': u'x86_64-gcc3'},
 u'creationDate': u'2016-03-01T15:08:38.622Z',
 u'environment': {u'addons': {u'activeAddons': {u'@x-addon-shield-trials-1': {u'appDisabled': False,
     u'blocklisted': False,
     u'description': u'Screen Draw Performance Enhancements.',
     u'foreignInstall': True,
     u'hasBinaryComponents': False,
     u'installDay': 16861,
     u'name': u'x-addon-shield-trials-1',
     u'scope': 1,
     u'signedState': 0,
     u'type': u'extension',
     u'updateDay': 16861,
     u'userDisabled': False,
     u'version': u'0.0.16'}},
   u'activeExperiment': {},
   u'activeGMPlugins': {u'gmp-gmpopenh264': {u'applyBackgroundUpdates': 1,
     u'userDisabled': False}},
   u'activePlugins': [{u'blocklisted': False,
     u'clicktoplay': True,
     u'description': u'5.

In [32]:
## here is the final report.
def daysSinceLaunch(jsnow, jslaunch):
    # this can be affected by clockSkew
    
    if (jsnow < jslaunch): return -1  # problem.
    return int(divmod(jsnow - jslaunch, 86400*1000)[0])  # n days.
    
def getFields(ping):
    payload = ping['payload']
    return (
        payload['who'],
        payload['name'],
        payload.get('branch', payload.get('variation','unk')),
        
        ## this is buggy / wrong
        daysSinceLaunch(ping['meta']['Timestamp']/(10**6),int(payload['firstrun'])),
        #int(payload['firstrun']),
        #ping['meta']['Timestamp']
        
    )
    return 

def reducedFields (ping_tuple):
    return ping_tuple[1:]  # drop who            
              
data = pings.map(getFields).distinct()
#data = pings.map(lambda x: (x['payload'],x['meta']['Timestamp']))
pp(data.collect())

#data.map(reducedFields).countByValue()

# make a much much nicer report here, for each experiment, for each branch...  N ever seen, % alive on day 3, total hours, etc.


[(u'/Users/mgrimes/Downloads', u'gregg experiment 1', u'a', 0),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'a', 0),
 (u'C:\\Users\\rjweiss\\Downloads', u'gregg experiment 1', u'b', -1),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'agressive', 0),
 (u'C:\\Users\\rjweiss\\Downloads', u'gregg experiment 1', u'b', 0),
 (u'/Users/mgrimes/Downloads', u'gregg experiment 1', u'a', 1),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'agressive', 2314),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'b', 0),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'medium', 0),
 (u'/var/folders/0z/4g3t_26s3gv835xswsslbn400000gq/T/f8683aca-209c-4e6b-8e08-5d2081219781',
  u'gregg experiment 1',
  u'a',
  0)]


In [33]:
from pprint import pprint as pp

df = pd.DataFrame(data.collect(),columns=['who','experiment','branch','days'])

pd.pivot_table(df, columns=('experiment','branch','days'),aggfunc=lambda x: len(set(x)))

     experiment          branch     days 
who  gregg experiment 1  a           0       3
                                     1       1
                         agressive   0       1
                                     2314    1
                         b          -1       1
                                     0       2
                         medium      0       1
dtype: int64

In [34]:
data.collect()

[(u'/Users/mgrimes/Downloads', u'gregg experiment 1', u'a', 0),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'a', 0),
 (u'C:\\Users\\rjweiss\\Downloads', u'gregg experiment 1', u'b', -1),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'agressive', 0),
 (u'C:\\Users\\rjweiss\\Downloads', u'gregg experiment 1', u'b', 0),
 (u'/Users/mgrimes/Downloads', u'gregg experiment 1', u'a', 1),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'agressive', 2314),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'b', 0),
 (u'/Users/glind/Downloads', u'gregg experiment 1', u'medium', 0),
 (u'/var/folders/0z/4g3t_26s3gv835xswsslbn400000gq/T/f8683aca-209c-4e6b-8e08-5d2081219781',
  u'gregg experiment 1',
  u'a',
  0)]