In [52]:
import pandas as pd
import numpy as np
from scipy.stats import chi2

Goal: See if click-through rate is significant. Choose Alpha = 0.05

Load file

In [19]:
AdvertisementData = pd.read_csv("Advertisement.txt", sep = ',')
AdvertisementData.head(5)

Unnamed: 0,advertisement_id,action
0,B,1
1,B,1
2,A,0
3,B,0
4,A,1


Find the contingency table

In [25]:
Crosstab = pd.crosstab(AdvertisementData['advertisement_id'], AdvertisementData['action'])
print(Crosstab)

action              0    1
advertisement_id          
A                 696  304
B                 628  372


In [26]:
Crosstab.values

array([[696, 304],
       [628, 372]], dtype=int64)

We use a chi-square test to compute 

Let $Sum_{NC}$ be the sum of all observed no-click values and similarly for $Sum_C$ for the click values over all advertisements. 
<br>
Let $N$ be the total number of advertisements shown for all observations.
<br>
Let $N_A$ be the sum of all observations for advertisement A 
<br>
Let $N_B$ be the sum of all observations for advertisement B
<br>
Let $C_A$ represent the number of clicks for advertisement A with $C_B$ for advertisement B
<br>
Let $NC_A$ represent the number of non-clicks for advertisement A with $NC_B$ for advertisement B
<br>

The expected number of non-clicks for advertisement A and B is:

$E_A = \frac{Sum_{NC}}{N} * NC_A$
<br>
$E_B = \frac{Sum_{NC}}{N} * NC_B$
<br>
<br>
and clicks:
<br>
<br>
$e_A = \frac{Sum_C}{N} * C_A$
<br>
$e_B = \frac{Sum_C}{N} * C_B$
<br>
<br>
The chi-square statistic is then:
<br>
$\chi^2 = \sum \frac{(E - O)^2}{E}$
<br>
<br>
where $O$ is the observed number of observations for clicks and no-clicks and $E$ is the expectation

Create a function to get the p-value

In [50]:
def GetTestStatistic(ContingentTable):
    A, B, C, D = ContingentTable.flatten()

    Numerator = (A*C-B*D)**2 * (A+B+C+D)
    Denominator = (A+B)*(C+D)*(A+C)*(B*D)
    
    
    return Numerator/Denominator

In [54]:
Chi_2 = GetTestStatistic(Crosstab.values)
p_value = chi2.cdf(Chi_2, 1)

if p_value < Alpha:
    print("Signficant")
else:
    print("Insignificant")

Insignificant
