In [3]:
%%capture --no-display

!pip install pandas scipy matplotlib

In [29]:
import glob
from collections import namedtuple

import pandas as pd
import numpy as np
import scipy.stats as ss

import matplotlib.pyplot as plt

pd.set_option('display.precision', 3)

%matplotlib inline

EXP = 'YET_ANOTHER_RECOMMENDER'

In [30]:
data = pd.concat([
    pd.read_json(data_path, lines=True)
    for data_path
    in glob.glob('../../../sim_results/*/data.json')
])

data['treatment'] = data['experiments'].map(lambda experiments: experiments[EXP])

data.groupby('treatment').count()

Unnamed: 0_level_0,message,timestamp,user,track,time,latency,recommendation,experiments
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
C,16061,16061,16061,16061,16061,16061,14052,16061
T1,18938,18938,18938,18938,18938,18938,16947,18938


In [31]:
Session = namedtuple('Session', ['timestamp', 'tracks', 'time', 'latency'])

def sessionize(user_data):
    sessions = []
    session = None

    for _, row in user_data.sort_values('timestamp').iterrows():
        if session is None:
            session = Session(row['timestamp'], 0, 0, 0)
        
        session = session._replace(
            tracks = session.tracks + 1, 
            time = session.time + row['time'],
            latency = session.latency + row['latency'] * 1000,
        )
        
        if row['message'] == 'last':
            sessions.append(session._asdict())
            session = None
    
    return sessions

In [32]:
sessions = (
    data
        .groupby(['user', 'treatment'])
        .apply(sessionize)
        .explode()
        .apply(pd.Series)
)

  data


In [33]:
user_level_data = (
  sessions
    .reset_index()
    .groupby(['user', 'treatment'])
    .agg(
        {
            'timestamp': 'count',
            'tracks': 'sum',
            'time': 'sum',
            'latency': 'sum'
        }
    )
)

user_level_data['sessions'] = user_level_data['timestamp']
user_level_data['mean_request_latency'] = user_level_data['latency'] / user_level_data['tracks']
user_level_data['mean_tracks_per_session'] = user_level_data['tracks'] / user_level_data['sessions']
user_level_data['mean_time_per_session'] = user_level_data['time'] / user_level_data['sessions']

metrics = [
    'time', 
    'sessions', 
    'mean_request_latency', 
    'mean_tracks_per_session', 
    'mean_time_per_session'
]

user_level_metrics = user_level_data[metrics].copy().reset_index()

treatment_level_metrics = (
    user_level_metrics
        .groupby('treatment')[metrics]
        .agg(['count', 'mean', 'var'])
)

In [34]:
def dof(n_0, n_1, s2_0, s2_1):
    numerator = (s2_0 / n_0 + s2_1 / n_1) * (s2_0 / n_0 + s2_1 / n_1)
    denominator = s2_0 * s2_0 / n_0 / n_0 / (n_0 - 1) + s2_1 * s2_1 / n_1 / n_1 / (n_1 - 1)

    return numerator / denominator


def ci(n_0, n_1, s2_0, s2_1, alpha=0.05):
    return ss.t.ppf(1 - alpha/2, dof(n_0, n_1, s2_0, s2_1)) * np.sqrt(s2_0 / n_0 + s2_1 / n_1)

effects = []

control = [data for treatment, data in treatment_level_metrics.iterrows() if treatment == 'C'][0]

for treatment, row in treatment_level_metrics.iterrows():
    if treatment == 'C':
        continue
    
    for metric in metrics:
        control_mean = control[metric]['mean']
        treatment_mean = row[metric]['mean']
        
        effect = treatment_mean - control_mean
        conf_int = ci(
            control[metric]['count'],
            row[metric]['count'],
            control[metric]['var'],
            row[metric]['var'],
        )

        effects.append({
            'treatment': treatment,
            'metric': metric,
            'control_mean': control_mean,
            'treatment_mean': treatment_mean,
            'effect': effect / control_mean * 100,
            'lower': (effect - conf_int) / control_mean * 100,
            'upper': (effect + conf_int) / control_mean * 100,
            'significant': (effect + conf_int) * (effect - conf_int) > 0
        })

In [35]:
def color(value):
    return 'color:red;' if value < 0 else 'color:green;'

def background(value):
    return 'color:white;background-color:green' if value else 'color:white;background-color:red'

(
    pd.DataFrame(effects)[[
        'treatment', 
        'metric',
        'effect', 
        'upper', 
        'lower', 
        'control_mean', 
        'treatment_mean',
        'significant'
    ]]
        .sort_values(['metric', 'treatment'], ascending=False)
        .style
        .applymap(color, subset=['effect', 'upper', 'lower'])
        .applymap(background, subset=['significant'])
)

  pd.DataFrame(effects)[[


Unnamed: 0,treatment,metric,effect,upper,lower,control_mean,treatment_mean,significant
0,T1,time,53.500149,59.07756,47.922739,3.603565,5.531478,True
1,T1,sessions,0.859693,3.534714,-1.815329,1.205882,1.216249,False
3,T1,mean_tracks_per_session,18.961303,20.867596,17.05501,7.998499,9.515119,True
4,T1,mean_time_per_session,52.007365,56.482504,47.532227,2.991663,4.547548,True
2,T1,mean_request_latency,740.147558,747.359378,732.935738,1.252349,10.52158,True
