# A/B Test Review

In [309]:
import pandas as pd

## Load log file

In [310]:
df = pd.read_json('../website/logs/app.log', lines=True)

In [311]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [312]:
df.head(2)

Unnamed: 0,level,message,timestamp
0,info,"{'event': 'impression', 'experiments': [], 'ex...",2024-04-08 08:52:05.429000+00:00
1,info,"{'event': 'impression', 'experiments': [], 'ex...",2024-04-08 08:52:07.387000+00:00


In [313]:
message_df = pd.json_normalize(df['message'])
message_df.head(2)

Unnamed: 0,event,experiments,page,url,username,extra.productId,extra.paymentId
0,impression,[],,/,,,
1,impression,[],products,/products,def841c4-aa9d-4b50-8668-f0d6686f4488,,


In [314]:
experiments_df = pd.json_normalize(df['message'], record_path='experiments')
experiments_df.head(2)

Unnamed: 0,group,name
0,test,SkipConfirmationScreen
1,control,DefaultSelectedProduct


In [315]:
df = pd.concat([df, message_df, experiments_df], axis=1).drop([
    'message', 
    'experiments', 
    'url',
    'level',
], axis=1)

In [316]:
df = df.sort_values(['username', 'timestamp'])

In [317]:
df.head(3)

Unnamed: 0,timestamp,event,page,username,extra.productId,extra.paymentId,group,name
36,2024-04-08 09:49:51.701000+00:00,impression,products,0f652ae2-707f-46a8-952b-ee20ea42c80c,,,control,DefaultSelectedProduct
37,2024-04-08 09:49:54.452000+00:00,impression,payment,0f652ae2-707f-46a8-952b-ee20ea42c80c,6.0,,test,CancelToPreviousScreen
38,2024-04-08 09:49:54.458000+00:00,impression,payment,0f652ae2-707f-46a8-952b-ee20ea42c80c,,,test,SkipConfirmationScreen


## Are test and control groups balanced?

In [318]:
df.groupby(['name', 'group'])['username'].nunique().reset_index(name='unique_count')

Unnamed: 0,name,group,unique_count
0,CancelToPreviousScreen,control,6
1,CancelToPreviousScreen,test,4
2,DefaultSelectedProduct,control,9
3,DefaultSelectedProduct,test,5
4,SkipConfirmationScreen,control,6
5,SkipConfirmationScreen,test,10


## Time spent

In [319]:
df['time_diff'] = df.groupby(['username', 'name', 'group'])['timestamp'].diff().dt.total_seconds()
df.groupby(['name', 'group'])['time_diff'].mean()

name                    group  
CancelToPreviousScreen  control    2.651667
                        test       4.091000
DefaultSelectedProduct  control    2.787889
                        test       2.891333
SkipConfirmationScreen  control    3.642000
                        test       2.462500
Name: time_diff, dtype: float64