In [9]:
# Importing the libraries
import numpy as np
import pandas as pd

In [10]:
#Setting a random seed for reproducability (can be changed to generate different datasets)
np.random.seed(34)

In [11]:
#Creating synthetic data for 5000 users
record_size = 5000

#Populating with control (A) and treatment (B) groups
groups = np.random.choice(['A', 'B'], size=record_size)

In [12]:
#Simulating random conversion rates for both groups
c_rates = np.random.uniform(0.05, 0.20, size=2)

#Assigning the conversion rates to each group
c_rate_a = c_rates[0]
c_rate_b = c_rates[1]

In [13]:
#Now, simulating the conversions according to the groups
conversions = np.where(groups == 'A', np.where(np.random.binomial(1, c_rate_a, record_size) == 1, 'Yes', 'No'),
                       np.where(np.random.binomial(1, c_rate_b, record_size) == 1, 'Yes', 'No'))

In [14]:
#Introducing random data pertaining to page views per session on the retail website page
page_views = np.random.randint(1, 15, size=record_size)

#Doing the same for time spent on the retail website by a user
time_spent = np.random.randint(40, 450, size=record_size) 

#Getting random user ids
user_ids = np.random.choice(range(10000, 19999), size=record_size, replace=False)

In [15]:
#Creating the synthetic dataframe for the retail website
retail_df = pd.DataFrame({
    'User ID' : user_ids,
    'Group' : groups,
    'Page Views' : page_views,
    'Time Spent' : time_spent,
    'Conversion' : conversions
})

#Adding more columns to try for a more realistic approach to data being obtained from a retail website
retail_df['Device'] = np.random.choice(['Desktop', 'Mobile'], size=record_size)
retail_df['Location'] = np.random.choice(['England', 'Scotland', 'Wales', 'Northern Ireland'], size=record_size)
display(retail_df.head())
display(retail_df.shape)

Unnamed: 0,User ID,Group,Page Views,Time Spent,Conversion,Device,Location
0,14292,B,3,424,No,Mobile,Northern Ireland
1,11682,A,9,342,No,Mobile,Scotland
2,19825,A,2,396,No,Desktop,Northern Ireland
3,16080,B,4,318,No,Desktop,Wales
4,18851,A,1,338,Yes,Desktop,Scotland


(5000, 7)

In [16]:
#Setting up the csv file for exporting
retail_df.to_csv('ab_testing.csv', index=False)