In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [21]:
def get_prob_and_loss( data , sample_size = None):

    vals = {}
    prob_test_better_control = []
    expected_loss_control = []
    expected_loss_test = []


    for day in range( len ( data ) ):

        for v in ["control", "test"]:

            #Generate Beta distribution params
            vals[f"u_{v}"] , vals[f"var_{v}"] = stats.beta.stats( a = 1 + data.loc[day, f'acc_{v}_clicks'], \
                    b = 1 + (data.loc[day, f'acc_{v}_visits'] - data.loc[day, f'acc_{v}_clicks']), moments='mv')

            
            #Generate Normal distribution
            vals[f"x_{v}"] = np.random.normal( loc = vals[f"u_{v}"], scale = 1.25*np.sqrt( vals[f"var_{v}"] ) ,size = sample_size)

            #Generate Beta PDF
            vals[f"f{v}"] = stats.beta.pdf ( vals[f"x_{v}"], 
                            a = 1 + data.loc[day, f'acc_{v}_clicks'], 
                            b = 1 + (data.loc[day, f'acc_{v}_visits'] - data.loc[day, f'acc_{v}_clicks']) )
            
            #Generate Normal PDF
            vals[f"g{v}"] = stats.norm.pdf( vals[f"x_{v}"], loc = vals[f"u_{v}"], scale = 1.25*np.sqrt( vals[f"var_{v}"] ) )


        # Beta / Normal
        y = ( vals['fcontrol'] * vals['ftest'] ) / ( vals['gcontrol'] * vals['gtest'] )
        yb = y[ vals['x_test'] >= vals['x_control'] ]
            
        #Calculate probabilities
        p = ( 1/ sample_size ) * np.sum(yb)


        loss_control = (1 / sample_size ) * np.sum ( ( ( vals['x_test'] - vals['x_control'] )*y) [vals['x_test'] >= vals['x_control']] )
        loss_test = (1 / sample_size ) * np.sum ( ( ( vals['x_control'] - vals['x_test'] )*y) [vals['x_control'] >= vals['x_test']] )

        prob_test_better_control.append(p)
        expected_loss_control.append( loss_control )
        expected_loss_test.append( loss_test )
        #expected_loss_control.append( expected_loss_control )
        #expected_loss_test.append( expected_loss_test )


    return prob_test_better_control, expected_loss_control, expected_loss_test

In [24]:
df = pd.read_csv("data_experiment.csv")
df1 = df.copy()
df1['group'] = df1['group'].replace( {"treatment":"test"})
for i in ['visits','clicks']:
    df1[i] = df1[i].astype(int)
df1 = df1.reset_index().rename( columns={'index':'day'} )
df1 = df1.pivot_table( index='day', columns='group', values=df1.select_dtypes(exclude='object').columns ,aggfunc='sum')
df1 = df1.swaplevel( axis=1 )
df1 = df1.reindex(sorted(df1.columns), axis=1).fillna(0)
df1.columns = ["_".join(i) for i in df1.columns]
for i in df1.columns:
    df1[f"acc_{i}"] = df1[i].cumsum()

df1.head()

Unnamed: 0_level_0,control_clicks,control_no_clicks,control_visits,test_clicks,test_no_clicks,test_visits,acc_control_clicks,acc_control_no_clicks,acc_control_visits,acc_test_clicks,acc_test_no_clicks,acc_test_visits
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,2.0,1.0,0.0,1.0
3,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0
4,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,3.0,1.0,1.0,2.0


In [25]:

proba_b_better_a, expected_loss_A, expected_loss_B = get_prob_and_loss(df1, sample_size=1000)

x1 = np.arange( len(proba_b_better_a) )

proba_b_better_a

[0.6851243280291711,
 0.5207135952360528,
 0.6874093634830224,
 0.5092780621210327,
 0.6089400759224297,
 0.5003743569665656,
 0.6738277283632863,
 0.7394855340107186,
 0.798558997411374,
 0.8397476643139422,
 0.890877317926472,
 0.8819957522191821,
 0.925705077673624,
 0.9172684088461792,
 0.9852436233699164,
 0.9569217190077299,
 0.960556525918281,
 0.9900829225088479,
 0.9677710942703738,
 1.0115569344992152,
 0.9791961076141255,
 0.9796476060142859,
 0.9937533487989503,
 0.9461359430441982,
 0.9701222074758657,
 0.9479305312766702,
 0.9785691702698452,
 0.9888437351725654,
 0.9627240847506386,
 0.9690282440695837,
 0.9374025692221348,
 0.9758010910063054,
 0.9684629197939212,
 0.9565711081996537,
 0.9779085233828555,
 0.9716035004530684,
 1.009151399464284,
 0.9948845295084999,
 0.9794294302659593,
 1.00176768732491,
 0.9452538083932324,
 0.9350585385887669,
 0.9719822666646382,
 0.9742492361041486,
 0.9201828818426075,
 0.9237175095047312,
 0.9152979458765574,
 0.9471672375319901,