In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [21]:
def get_prob_and_loss( data , sample_size = None):

    vals = {}
    prob_test_better_control = []
    expected_loss_control = []
    expected_loss_test = []


    for day in range( len ( data ) ):

        for v in ["control", "test"]:

            #Generate Beta distribution params
            vals[f"u_{v}"] , vals[f"var_{v}"] = stats.beta.stats( a = 1 + data.loc[day, f'acc_{v}_clicks'], \
                    b = 1 + (data.loc[day, f'acc_{v}_visits'] - data.loc[day, f'acc_{v}_clicks']), moments='mv')

            
            #Generate Normal distribution
            vals[f"x_{v}"] = np.random.normal( loc = vals[f"u_{v}"], scale = 1.25*np.sqrt( vals[f"var_{v}"] ) ,size = sample_size)

            #Generate Beta PDF
            vals[f"f{v}"] = stats.beta.pdf ( vals[f"x_{v}"], 
                            a = 1 + data.loc[day, f'acc_{v}_clicks'], 
                            b = 1 + (data.loc[day, f'acc_{v}_visits'] - data.loc[day, f'acc_{v}_clicks']) )
            
            #Generate Normal PDF
            vals[f"g{v}"] = stats.norm.pdf( vals[f"x_{v}"], loc = vals[f"u_{v}"], scale = 1.25*np.sqrt( vals[f"var_{v}"] ) )


        # Beta / Normal
        y = ( vals['fcontrol'] * vals['ftest'] ) / ( vals['gcontrol'] * vals['gtest'] )
        yb = y[ vals['x_test'] >= vals['x_control'] ]
            
        #Calculate probabilities
        p = ( 1/ sample_size ) * np.sum(yb)


        loss_control = (1 / sample_size ) * np.sum ( ( ( vals['x_test'] - vals['x_control'] )*y) [vals['x_test'] >= vals['x_control']] )
        loss_test = (1 / sample_size ) * np.sum ( ( ( vals['x_control'] - vals['x_test'] )*y) [vals['x_control'] >= vals['x_test']] )

        prob_test_better_control.append(p)
        expected_loss_control.append( loss_control )
        expected_loss_test.append( loss_test )
        #expected_loss_control.append( expected_loss_control )
        #expected_loss_test.append( expected_loss_test )


    return prob_test_better_control, expected_loss_control, expected_loss_test

In [16]:
df.head()

Unnamed: 0,clicks,visits,group,no_clicks
0,0,1,control,1.0
1,1,1,control,0.0
2,1,1,treatment,0.0
3,0,1,treatment,1.0
4,0,1,control,1.0


In [22]:
df = pd.read_csv("data_experiment.csv")
df1 = df.copy()
df1['group'] = df1['group'].replace( {"treatment":"test"})
for i in ['visits','clicks']:
    df1[i] = df1[i].astype(int)
df1 = df1.reset_index().rename( columns={'index':'day'} )
df1 = df1.pivot_table( index='day', columns='group', values=df1.select_dtypes(exclude='object').columns ,aggfunc='sum')

df1 = df1.swaplevel( axis=1 )
df1 = df1.reindex(sorted(df1.columns), axis=1).fillna(0)
df1.columns = ["_".join(i) for i in df1.columns]
for i in df1.columns:
    df1[f"acc_{i}"] = df1[i].cumsum()
# df1 = df1.rename(columns={
#     'control_click':"clicks_A",
#     'treatment_click':"clicks_B",
#     "control_visit":"visits_A",
#     "treatment_visit":"visits_B",
#     "acc_control_click":"acc_clicks_A",
#     "acc_treatment_click":"acc_clicks_B",
#     "acc_control_visit":"acc_visits_A",
#     "acc_treatment_visit":"acc_visits_B"
# })

df1.head()

Unnamed: 0_level_0,control_clicks,control_no_clicks,control_visits,test_clicks,test_no_clicks,test_visits,acc_control_clicks,acc_control_no_clicks,acc_control_visits,acc_test_clicks,acc_test_no_clicks,acc_test_visits
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,2.0,1.0,0.0,1.0
3,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0
4,0.0,1.0,1.0,0.0,0.0,0.0,1.0,2.0,3.0,1.0,1.0,2.0


In [23]:

proba_b_better_a, expected_loss_A, expected_loss_B = get_prob_and_loss(df1, sample_size=1000)

x1 = np.arange( len(proba_b_better_a) )

proba_b_better_a

[0.6577197342189072,
 0.496904853392234,
 0.6975987143724665,
 0.4936069164656365,
 0.6455432270452705,
 0.49950044336862837,
 0.6266211066215128,
 0.6924949072121588,
 0.6997164249993553,
 0.6184635982700332,
 0.8146313199179137,
 0.8606596618829587,
 0.7930464312939528,
 0.8438665132502253,
 0.9199905567942739,
 0.929660823816844,
 0.9047846227168279,
 0.9427990918503164,
 0.9500086245468186,
 0.9696260576782334,
 0.9678757016840387,
 0.9698169562558899,
 1.0038501924329322,
 0.9874771830608791,
 0.9824895118105502,
 1.0105301049752455,
 0.9809225700257721,
 0.9993583349059182,
 1.0169481827629927,
 0.9865441800570405,
 0.9704160296217121,
 0.9787625174850408,
 0.9819621318209999,
 0.9846835146135773,
 1.0043678245456324,
 0.9844320631633672,
 0.9933201881285918,
 0.9965034075988675,
 1.0406673701355056,
 1.0038877945426319,
 0.9894751790530488,
 1.0113611452989808,
 0.9872585533293228,
 1.0291870780417762,
 1.0112465725657256,
 1.0123393201489672,
 1.0092565502382949,
 0.98423442947