In [1]:
import pandas as pd
import numpy as np
def load_data(file_path):
    try:
        df = pd.read_csv(file_path)
        print(df.head(5))
        return df
    except FileNotFoundError as e:
        print(f"Error: The file {file_path} was not found due to {e}.")



In [2]:
!pip install plotly
!pip install scipy
!pip install statsmodels



In [3]:
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.stats.proportion import proportions_ztest
from scipy import stats


#Let's analyse data basic info
df = load_data('./website_ab_test.csv')
df.describe()


         Theme  Click Through Rate  Conversion Rate  Bounce Rate  \
0  Light Theme            0.054920         0.282367     0.405085   
1  Light Theme            0.113932         0.032973     0.732759   
2   Dark Theme            0.323352         0.178763     0.296543   
3  Light Theme            0.485836         0.325225     0.245001   
4  Light Theme            0.034783         0.196766     0.765100   

   Scroll_Depth  Age   Location  Session_Duration Purchases Added_to_Cart  
0     72.489458   25    Chennai              1535        No           Yes  
1     61.858568   19       Pune               303        No           Yes  
2     45.737376   47    Chennai               563       Yes           Yes  
3     76.305298   58       Pune               385       Yes            No  
4     48.927407   25  New Delhi              1437        No            No  


Unnamed: 0,Click Through Rate,Conversion Rate,Bounce Rate,Scroll_Depth,Age,Session_Duration
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.256048,0.253312,0.505758,50.319494,41.528,924.999
std,0.139265,0.139092,0.172195,16.895269,14.114334,508.231723
min,0.010767,0.010881,0.20072,20.011738,18.0,38.0
25%,0.140794,0.131564,0.353609,35.655167,29.0,466.5
50%,0.253715,0.252823,0.514049,51.130712,42.0,931.0
75%,0.370674,0.37304,0.648557,64.666258,54.0,1375.25
max,0.499989,0.498916,0.799658,79.997108,65.0,1797.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Theme               1000 non-null   object 
 1   Click Through Rate  1000 non-null   float64
 2   Conversion Rate     1000 non-null   float64
 3   Bounce Rate         1000 non-null   float64
 4   Scroll_Depth        1000 non-null   float64
 5   Age                 1000 non-null   int64  
 6   Location            1000 non-null   object 
 7   Session_Duration    1000 non-null   int64  
 8   Purchases           1000 non-null   object 
 9   Added_to_Cart       1000 non-null   object 
dtypes: float64(4), int64(2), object(4)
memory usage: 78.3+ KB


In [5]:
df.isnull().sum()

Theme                 0
Click Through Rate    0
Conversion Rate       0
Bounce Rate           0
Scroll_Depth          0
Age                   0
Location              0
Session_Duration      0
Purchases             0
Added_to_Cart         0
dtype: int64

Theme: dark or light
<br>

**Conversion Rate:** The percentage of users who signed up on the platform after visiting for the first time.
Bounce Rate: The percentage of users who leave the website without further interaction after visiting a single page.
Scroll Depth: The depth to which users scroll through the website pages.
Age: The age of the user.
Location: The location of the user.
Session Duration: The duration of the user’s session on the website.
Purchases: Whether the user purchased the book (Yes/No).
Added_to_Cart: Whether the user added books to the cart (Yes/No).

#### Scatter plot
####  Histogram
#### line plot

# Scatter plot for 'click through rate' and 'conversion rate'

In [6]:
# Scatter plot for Click Through Rate and Conversion Rate
fig = px.scatter(df, x='Click Through Rate',
                 y='Conversion Rate', color='Theme',
                 title='CTR vs Conversion Rate', trendline='ols')
fig.show()

  return np.dot(exog, params)
  return np.dot(exog, params)
  return np.dot(exog, params)


In [9]:
# Extract data for each theme
light_theme_data = df[df['Theme'] == 'Light Theme']
dark_theme_data = df[df['Theme'] == 'Dark Theme']

# Create grouped bar chart for Click Through Rate
fig = go.Figure()

fig.add_trace(go.Histogram(x=light_theme_data['Click Through Rate'], name='Light Theme', opacity=0.6))
fig.add_trace(go.Histogram(x=dark_theme_data['Click Through Rate'], name='Dark Theme', opacity=0.6))

fig.update_layout(
    title_text='Click Through Rate by Theme',
    xaxis_title_text='Click Through Rate',
    yaxis_title_text='Frequency',
    barmode='group',
    bargap=0.1
)

fig.show()

In [11]:
# Histogram for Click Through Rate (CTR) by Theme
fig_ctr_hist = px.histogram(df, x='Click Through Rate', color='Theme',
                            barmode='overlay',
                            title='Histogram of Click Through Rate by Theme',
                            nbins=30,
                            opacity=0.7)
fig_ctr_hist.show()

In [12]:
fig = go.Figure()

fig.add_trace(go.Histogram(x=light_theme_data['Conversion Rate'], 
                           name='Light Theme', opacity=0.6, nbinsx=20))
fig.add_trace(go.Histogram(x=dark_theme_data['Conversion Rate'], 
                           name='Dark Theme', opacity=0.6, nbinsx=20))

fig.update_layout(
    title_text='Conversion Rate by Theme',
    xaxis_title_text='Conversion Rate',
    yaxis_title_text='Frequency',
    barmode='group',
    bargap=0.1
)

fig.show()

In [13]:
fig = go.Figure()
fig.add_trace(go.Box(y=light_theme_data['Bounce Rate'], 
                     name='Light Theme'))
fig.add_trace(go.Box(y=dark_theme_data['Bounce Rate'], 
                     name='Dark Theme'))

fig.update_layout(
    title_text='Bounce Rate by Theme',
    yaxis_title_text='Bounce Rate',
)

fig.show()

In [14]:
fig = go.Figure()
fig.add_trace(go.Box(y=light_theme_data['Scroll_Depth'], 
                     name='Light Theme'))
fig.add_trace(go.Box(y=dark_theme_data['Scroll_Depth'], 
                     name='Dark Theme'))

fig.update_layout(
    title_text='Scroll Depth by Theme',
    yaxis_title_text='Scroll Depth',
)

fig.show()

In [15]:
# A/B testing for Purchases
light_theme_conversions = light_theme_data[light_theme_data['Purchases'] == 'Yes'].shape[0]
light_theme_total = light_theme_data.shape[0]

dark_theme_conversions = dark_theme_data[dark_theme_data['Purchases'] == 'Yes'].shape[0]
dark_theme_total = dark_theme_data.shape[0]

conversion_counts = [light_theme_conversions, dark_theme_conversions]
sample_sizes = [light_theme_total, dark_theme_total]

light_theme_conversion_rate = light_theme_conversions / light_theme_total
dark_theme_conversion_rate = dark_theme_conversions / dark_theme_total

# Perform two-sample proportion test
zstat, pval = proportions_ztest(conversion_counts, sample_sizes)
print("Light Theme Conversion Rate:", light_theme_conversion_rate)
print("Dark Theme Conversion Rate:", dark_theme_conversion_rate)
print("A/B Testing - z-statistic:", zstat, " p-value:", pval)

Light Theme Conversion Rate: 0.5308641975308642
Dark Theme Conversion Rate: 0.5038910505836576
A/B Testing - z-statistic: 0.8531246206222649  p-value: 0.39359019934127804


In [16]:
light_theme_session_duration = light_theme_data['Session_Duration']
dark_theme_session_duration = dark_theme_data['Session_Duration']

# Calculate the average session duration for both themes
light_theme_avg_duration = light_theme_session_duration.mean()
dark_theme_avg_duration = dark_theme_session_duration.mean()

# Print the average session duration for both themes
print("Light Theme Average Session Duration:", light_theme_avg_duration)
print("Dark Theme Average Session Duration:", dark_theme_avg_duration)

# Perform two-sample t-test for session duration
tstat, pval = stats.ttest_ind(light_theme_session_duration, dark_theme_session_duration)
print("A/B Testing for Session Duration - t-statistic:", tstat, " p-value:", pval)

Light Theme Average Session Duration: 930.8333333333334
Dark Theme Average Session Duration: 919.4824902723735
A/B Testing for Session Duration - t-statistic: 0.3528382474155483  p-value: 0.7242842138292167


A/B testing is a powerful and widely used technique to compare and evaluate marketing strategies, designs, layouts, or themes. The primary purpose of A/B testing is to make data-driven decisions that lead to improved user experiences, enhanced performance metrics, and ultimately better business outcomes.