In [None]:
import pandas as pd
from scipy.stats import ttest_ind

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!ls /content/drive/MyDrive/DS/Datasets/website_ab_test.csv

/content/drive/MyDrive/DS/Datasets/website_ab_test.csv


In [None]:
df = pd.read_csv("/content/drive/MyDrive/DS/Datasets/website_ab_test.csv")

In [None]:
print(df.head())

         Theme  Click Through Rate  Conversion Rate  Bounce Rate  \
0  Light Theme            0.054920         0.282367     0.405085   
1  Light Theme            0.113932         0.032973     0.732759   
2   Dark Theme            0.323352         0.178763     0.296543   
3  Light Theme            0.485836         0.325225     0.245001   
4  Light Theme            0.034783         0.196766     0.765100   

   Scroll_Depth  Age   Location  Session_Duration Purchases Added_to_Cart  
0     72.489458   25    Chennai              1535        No           Yes  
1     61.858568   19       Pune               303        No           Yes  
2     45.737376   47    Chennai               563       Yes           Yes  
3     76.305298   58       Pune               385       Yes            No  
4     48.927407   25  New Delhi              1437        No            No  


In [None]:
summary = {
    'Number of Records': df.shape[0],
    'Number of Columns': df.shape[1],
    'Missing Value': df.isnull().sum(),
    'Numberical Columns Summary': df.describe()

}

summary

{'Number of Records': 1000,
 'Number of Columns': 10,
 'Missing Value': Theme                 0
 Click Through Rate    0
 Conversion Rate       0
 Bounce Rate           0
 Scroll_Depth          0
 Age                   0
 Location              0
 Session_Duration      0
 Purchases             0
 Added_to_Cart         0
 dtype: int64,
 'Numberical Columns Summary':        Click Through Rate  Conversion Rate  Bounce Rate  Scroll_Depth  \
 count         1000.000000      1000.000000  1000.000000   1000.000000   
 mean             0.256048         0.253312     0.505758     50.319494   
 std              0.139265         0.139092     0.172195     16.895269   
 min              0.010767         0.010881     0.200720     20.011738   
 25%              0.140794         0.131564     0.353609     35.655167   
 50%              0.253715         0.252823     0.514049     51.130712   
 75%              0.370674         0.373040     0.648557     64.666258   
 max              0.499989         0.49891

In [None]:
#select the numerical column
select_numerical_data = df.select_dtypes(include = ['number'])

#grouping data by theme and calculating mean value for the metrics
theme_perfomance = df.groupby('Theme')[select_numerical_data.columns].mean()

#sorting the data by converstion rate comparision
theme_performance_sorted = theme_perfomance.sort_values(by = 'Conversion Rate', ascending = False)

print(theme_performance_sorted)

             Click Through Rate  Conversion Rate  Bounce Rate  Scroll_Depth  \
Theme                                                                         
Light Theme            0.247109         0.255459     0.499035     50.735232   
Dark Theme             0.264501         0.251282     0.512115     49.926404   

                   Age  Session_Duration  
Theme                                     
Light Theme  41.734568        930.833333  
Dark Theme   41.332685        919.482490  


In [None]:
#extracting conversion rates for both themes
conversion_rates_light = df[df['Theme'] == 'Light Theme']['Conversion Rate']
conversion_rates_dark = df[df['Theme'] == 'Dark Theme']['Conversion Rate']

t_stat, p_value = ttest_ind(conversion_rates_light, conversion_rates_dark, equal_var = False)

t_stat, p_value

(0.4748494462782632, 0.6349982678451778)

In [None]:
#extracting ctr for both themes
ctr_light = df[df['Theme'] == 'Light Theme']['Click Through Rate']
ctr_dark = df[df['Theme'] == 'Dark Theme']['Click Through Rate']

t_stat_ctr, p_value_ctr = ttest_ind(ctr_light, ctr_dark, equal_var = False)

t_stat_ctr, p_value_ctr

(-1.9781708664172253, 0.04818435371010704)

In [None]:
#extracting bonuce rate for both themes
bounce_rates_light = df[df['Theme'] == 'Light Theme']['Bounce Rate']
bounce_rates_dark = df[df['Theme'] == 'Dark Theme']['Bounce Rate']

t_stat_bounce, p_value_bounce = ttest_ind(bounce_rates_light, bounce_rates_dark, equal_var = False)

t_stat_bounce, p_value_bounce

#extracting scroll depth for both themes
scroll_depth_light = df[df['Theme'] == 'Light Theme']['Scroll_Depth']
scroll_depth_dark = df[df['Theme'] == 'Dark Theme']['Scroll_Depth']

t_stat_scroll, p_value_scroll = ttest_ind(scroll_depth_light, scroll_depth_dark, equal_var = False)

t_stat_scroll, p_value_scroll

#creating a table fro comparision
comparision_table = pd.DataFrame({
    'Metric': ['Click Through Rate','Conversion Rate','Bounce Rate', 'Scroll_Depth'],
    'T-Statistic': [t_stat_ctr, t_stat, t_stat_bounce, t_stat_scroll],
    'P-Value': [p_value_ctr, p_value, p_value_bounce, p_value_scroll]
})

comparision_table

Unnamed: 0,Metric,T-Statistic,P-Value
0,Click Through Rate,-1.978171,0.048184
1,Conversion Rate,0.474849,0.634998
2,Bounce Rate,-1.201888,0.229692
3,Scroll_Depth,0.756228,0.449692


So we can conclude from above that we fail to reject i.e we can't reject the null value hypothsis. As Conversion Rate, Bounce Rate, Scroll_Depth > pvalue(0.05)
and only 1 param i.e Click Through Rate < pvalue(0.05)

Note:
- Null Hypo(HO): Tells that there is no difference between light and dark theme
- Alternative Hypo(Ha): Tells that there is difference between light and dark theme
