Hypothesis Testing using Python
Hypothesis Testing is a statistical method used to make inferences or decisions about a population based on sample data. It starts with a null hypothesis (H0), which represents a default stance or no effect, and an alternative hypothesis (H1 or Ha), which represents what we aim to prove or expect to find. The process involves using sample data to determine whether to reject the null hypothesis in favor of the alternative hypothesis, based on the likelihood of observing the sample data under the null hypothesis

In [3]:
import pandas as pd
from scipy.stats import ttest_ind

df = pd.read_csv("website_ab_test.csv")

print(df.head())

         Theme  Click Through Rate  Conversion Rate  Bounce Rate  \
0  Light Theme            0.054920         0.282367     0.405085   
1  Light Theme            0.113932         0.032973     0.732759   
2   Dark Theme            0.323352         0.178763     0.296543   
3  Light Theme            0.485836         0.325225     0.245001   
4  Light Theme            0.034783         0.196766     0.765100   

   Scroll_Depth  Age   Location  Session_Duration Purchases Added_to_Cart  
0     72.489458   25    Chennai              1535        No           Yes  
1     61.858568   19       Pune               303        No           Yes  
2     45.737376   47    Chennai               563       Yes           Yes  
3     76.305298   58       Pune               385       Yes            No  
4     48.927407   25  New Delhi              1437        No            No  


In [4]:
df.head

<bound method NDFrame.head of            Theme  Click Through Rate  Conversion Rate  Bounce Rate  \
0    Light Theme            0.054920         0.282367     0.405085   
1    Light Theme            0.113932         0.032973     0.732759   
2     Dark Theme            0.323352         0.178763     0.296543   
3    Light Theme            0.485836         0.325225     0.245001   
4    Light Theme            0.034783         0.196766     0.765100   
..           ...                 ...              ...          ...   
995   Dark Theme            0.282792         0.401605     0.200720   
996   Dark Theme            0.299917         0.026372     0.762641   
997  Light Theme            0.370254         0.019838     0.607136   
998  Light Theme            0.095815         0.137953     0.458898   
999   Dark Theme            0.342588         0.061315     0.452410   

     Scroll_Depth  Age   Location  Session_Duration Purchases Added_to_Cart  
0       72.489458   25    Chennai              1535

In [5]:
df.shape

(1000, 10)

In [6]:
df.isnull().sum()

Theme                 0
Click Through Rate    0
Conversion Rate       0
Bounce Rate           0
Scroll_Depth          0
Age                   0
Location              0
Session_Duration      0
Purchases             0
Added_to_Cart         0
dtype: int64

In [7]:
df.describe()

Unnamed: 0,Click Through Rate,Conversion Rate,Bounce Rate,Scroll_Depth,Age,Session_Duration
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.256048,0.253312,0.505758,50.319494,41.528,924.999
std,0.139265,0.139092,0.172195,16.895269,14.114334,508.231723
min,0.010767,0.010881,0.20072,20.011738,18.0,38.0
25%,0.140794,0.131564,0.353609,35.655167,29.0,466.5
50%,0.253715,0.252823,0.514049,51.130712,42.0,931.0
75%,0.370674,0.37304,0.648557,64.666258,54.0,1375.25
max,0.499989,0.498916,0.799658,79.997108,65.0,1797.0


In [8]:
# extracting conversion rates for both themes
conversion_rates_light = df[df['Theme'] == 'Light Theme']['Conversion Rate']
conversion_rates_dark = df[df['Theme'] == 'Dark Theme']['Conversion Rate']

# performing a two-sample t-test
t_stat, p_value = ttest_ind(conversion_rates_light, conversion_rates_dark, equal_var=False)

t_stat, p_value

(np.float64(0.4748494462782632), np.float64(0.6349982678451778))

In [9]:
# extracting click through rates for both themes
ctr_light = df[df['Theme'] == 'Light Theme']['Click Through Rate']
ctr_dark = df[df['Theme'] == 'Dark Theme']['Click Through Rate']

# performing a two-sample t-test
t_stat_ctr, p_value_ctr = ttest_ind(ctr_light, ctr_dark, equal_var=False)

t_stat_ctr, p_value_ctr

(np.float64(-1.9781708664172253), np.float64(0.04818435371010704))

In [10]:
# extracting bounce rates for both themes
bounce_rates_light = df[df['Theme'] == 'Light Theme']['Bounce Rate']
bounce_rates_dark = df[df['Theme'] == 'Dark Theme']['Bounce Rate']

# performing a two-sample t-test for bounce rate
t_stat_bounce, p_value_bounce = ttest_ind(bounce_rates_light, bounce_rates_dark, equal_var=False)

t_stat_bounce, p_value_bounce 

(np.float64(-1.2018883310494073), np.float64(0.229692077505148))

In [11]:
# extracting scroll depths for both themes
scroll_depth_light = df[df['Theme'] == 'Light Theme']['Scroll_Depth']
scroll_depth_dark = df[df['Theme'] == 'Dark Theme']['Scroll_Depth']

# performing a two-sample t-test for scroll depth
t_stat_scroll, p_value_scroll = ttest_ind(scroll_depth_light, scroll_depth_dark, equal_var=False)

t_stat_scroll, p_value_scroll

(np.float64(0.7562277864140986), np.float64(0.4496919249484911))

In [12]:
# creating a table for comparison
comparison_table = pd.DataFrame({
    'Metric': ['Click Through Rate', 'Conversion Rate', 'Bounce Rate', 'Scroll Depth'],
    'T-Statistic': [t_stat_ctr, t_stat, t_stat_bounce, t_stat_scroll],
    'P-Value': [p_value_ctr, p_value, p_value_bounce, p_value_scroll]
})

comparison_table

Unnamed: 0,Metric,T-Statistic,P-Value
0,Click Through Rate,-1.978171,0.048184
1,Conversion Rate,0.474849,0.634998
2,Bounce Rate,-1.201888,0.229692
3,Scroll Depth,0.756228,0.449692
