In [5]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind, chi2_contingency, norm

### **Data Processing**

In [3]:
import pandas as pd

file_path = "../raw-data/raw-data.csv"
df = pd.read_csv(file_path)

df = df.dropna()
df = df.drop(columns=df.columns[0])
data = df.to_numpy()

In [7]:
shaking_adjusted = data[(data == 1) | (data == 3)]
flipping_adjusted = data[(data == 2) | (data == 3)]

### **t-test**

In [37]:
t_stat_adjusted, p_value = ttest_ind(shaking_adjusted, flipping_adjusted, nan_policy='omit')
t_stat_adjusted, p_value

(-13.165483252721433, 3.76154292197411e-26)

p-value is less than 0.05 -> reject H0 

### **chi-square test**

In [35]:
shaking_count_adjusted = np.count_nonzero(shaking_adjusted)
flipping_count_adjusted = np.count_nonzero(flipping_adjusted)
other_count_adjusted = np.count_nonzero(data == 0)
contingency_table_adjusted = np.array([[shaking_count_adjusted, flipping_count_adjusted], [other_count_adjusted, other_count_adjusted]])

contingency_table_adjusted

array([[103,  37],
       [105, 105]])

In [36]:
chi2_stat_adjusted, p_value_chi2_adjusted, _, _ = chi2_contingency(contingency_table_adjusted)
chi2_stat_adjusted, p_value_chi2_adjusted

(18.391609674070057, 1.798481716866946e-05)

p-value is less than 0.05 -> reject H0


### **z-test for proportions**

Null Hypothesis (H0): The proportion of accidental activations due to shaking is less than or equal to the proportion of accidental activations due to flipping.

Alternative Hypothesis (H1): The proportion of accidental activations due to shaking is greater than the proportion of accidental activations due to flipping.

**Rationale of using z-test**

- sample size is 240, which < 30
- shaking doesn't afffect flipping
- data should be normally distributed. However, for large sample sizes (over 30) this doesn’t always matter.
- Your data should be randomly selected from a population, where each item has an equal chance of being selected.
- Sample sizes should be equal (both 240)


In [15]:
n_shaking = np.count_nonzero((data == 1) | (data == 3))
n_flipping = np.count_nonzero((data == 2) | (data == 3))

p_shaking = n_shaking / data.size
p_flipping = n_flipping / data.size

p_pooled = (n_shaking + n_flipping) / (2 * data.size)

z_stat = (p_shaking - p_flipping) / np.sqrt(2 * p_pooled * (1 - p_pooled) / data.size)

p_value = 1 - norm.cdf(np.abs(z_stat))
p_value

1.705102725679808e-11