In [1]:
import numpy as np
import matplotlib.pyplot as plt
from iminuit import Minuit
import seaborn as sns
import sys
import scipy.stats as sc
import pandas as pd
import ffl

sys.path.append('../External_Functions')
from ExternalFunctions import UnbinnedLH, Chi2Regression, BinnedLH
from ExternalFunctions import nice_string_output, add_text_to_ax  # Useful functions to print fit results on figure

sns.set()
sns.set_context("paper", font_scale=1.4)
sns.set_style("ticks")

colors = sns.color_palette('deep', 10, desat = 1)
sns.set_palette(colors)
plt.rcParams['axes.grid'] = True
plt.rcParams['axes.grid.axis'] = 'y'
plt.rcParams['axes.grid.which'] = 'major'
plt.rcParams['grid.linestyle'] = '--'
plt.rcParams['grid.linewidth'] = 2
plt.rcParams['figure.figsize'] = (16, 9)




# 4.2 - A medical experiment is testing if a drug has a specific side effect. Out of 24 persons taking the drug, 10 had the side effect. For 24 other persons getting a placebo, only 4 had the side effect. Would you claim that the drug has this side effect?

This problem can be written as a contingency table and we can then use Fisher's Exact Test on it

$$ p = \frac{(A+B)!(C+D)!(A+C)!(B+D)!}{A!B!C!D!} $$

where $A$ is the entry $1,1$ in the table, $B$ is $1,2$ and so forth. When doing the test, we have to keep track of the null hypothesis:

$H_0$: There is no correlation between experiencing a sideeffect and taking the drug.


With the calculated p_value of 0.043 we must then reject this hypothesis if we set the usual signifiance level to 95%. It would seem like there is some relation between taking the drugs, and experiencing a side effect.




In [3]:
A = 10
B = 14
C = 4
D = 20

from math import factorial

p_val = factorial(A+B) * factorial(C+D) * factorial(A+C) * factorial(B+D) / factorial(A) / factorial(B) / factorial(C) / factorial(D)  / factorial(A+B+C+D)

print(p_val)

0.04320840795901439
