# OnlySolution's A/B Testing Business Case
An analysis based on A/B testing on rating bar redesign. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Prepare the data

In [2]:
# Read the data
df = pd.read_csv('data/rating-bar-dataset.csv')

# Drop rows filled with NaN values
df.dropna(how='all', inplace=True)

df.drop(columns='CustomerID', inplace=True)

# Replace NaN values with 0
df.fillna(0, inplace=True)

cols = []

for i in range(1, 13):
    cols += [f'M{i}Pay', f'M{i}Ref']

df.columns = ['Device', 'Medium', 'Date', 'Category', 'TrialPay', 'TrialRef', *cols, 'Rated', 'Experiment']

# Convert date strings to datetime objects
df['Date'] = pd.to_datetime(df['Date'])

# Replace all 'ppc' with 'PPC' in 'Medium' column
df['Medium'].replace('ppc', 'PPC', inplace=True)

## Add necessary columns

In [3]:
# Add columns 'Payments' and 'Refunds' for total sums of payments and refunds
df['Payments'] = df.loc[:, 'TrialPay':'M12Pay':2].sum(axis=1)
df['Refunds'] = df.loc[:, 'TrialRef':'M12Ref':2].sum(axis=1)

# Calculate LTV per customer based on new data
df['LTV'] = df['Payments'] - df['Refunds']

## Check if A/B customer groups are similar

In [4]:
# Split customers into 3 groups: control, experiment and others
control = df.loc[df['Experiment'] == 'Control']
experiment = df.loc[df['Experiment'] == 'Variation']
non_experiment = df.loc[df['Experiment'] == 'Not an Experiment']

In [5]:
# Compare mean LTV between groups
control['LTV'].mean(), experiment['LTV'].mean(), non_experiment['LTV'].mean()

(9.996883116883131, 10.05438144329898, 10.433963702913088)

As we can see, mean LTV is almost equal for groups A and B, and they both have a quite close mean LTV in comparison with customers that were not involved in the experiment. It means that the experiment is accuate.

## Calculate confidence interval for the experiment

In [6]:
# Calculate conversion for control and experiment
control = df.loc[df['Experiment'] == 'Control']['Rated']
experiment = df.loc[df['Experiment'] == 'Variation']['Rated']

control_conversion = (control.value_counts() / control.shape[0])[1]
experiment_conversion = (experiment.value_counts() / experiment.shape[0])[1]

In [7]:
# Calculate lift value
lift = experiment_conversion / control_conversion - 1

![Expected Lift for Experiment](img/lift.jpg)

In [8]:
# Calculate standard error for control and experiment
control_std_error = (control_conversion * (1 - control_conversion) / control.shape[0]) ** 0.5
experiment_std_error = (experiment_conversion * (1 - experiment_conversion) / experiment.shape[0]) ** 0.5

print('Control standard error:', round(control_std_error, 7))
print('Experiment standard error:', round(experiment_std_error, 7))

Control standard error: 0.025463
Experiment standard error: 0.0251179


![Standard Error for Control](img/control_standard_error.jpg)
![Standard Error for Experiment](img/experiment_standard_error.jpg)

In [9]:
# Calculate lift variance for control and experiment
experiment_lift_variance = ((experiment_conversion / control_conversion) ** 2 * 
                            (control_std_error ** 2 / control_conversion ** 2 + 
                             experiment_std_error ** 2 / experiment_conversion ** 2))
    
print('Experiment lift variance', experiment_lift_variance)

Experiment lift variance 0.005252547978403822


![Lift Variance for Experiment](img/experiment_lift_variance.jpg)

In [10]:
# Calculate lower and upper bound for the experiment [t-value is equal to 1.645]
t_value = 1.645

experiment_lower_bound = lift - t_value * experiment_lift_variance ** 0.5
experiment_upper_bound = lift + t_value * experiment_lift_variance ** 0.5

print(f'Lower bound: {round(experiment_lower_bound, 4) * 100}%')
print(f'Lower bound: {round(experiment_upper_bound, 4) * 100}%')

Lower bound: -1.78%
Lower bound: 22.06%


![Bounds for Experiment](img/experiment_lift_bounds.jpg)

As we can see, confidence interval crosses 0. Therefore, we cannot say if the difference in conversion rate is significant. The test is not winner.