# Contrastes de independencia y homogeneidad



$$
\left\{
\begin{array}{ll}
H_0: & \mathrm{la\ etnia\ es\ independiente\ de\ su\ postura\ politica}\\\
H_1: & \mathrm{la\ etnia\ no\ es\ independiente\ de\ su\ postura\ politica}
\end{array}
\right.
$$

In [3]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [4]:
np.random.seed(2020)

In [9]:
voter_etni = np.random.choice(a = ["asiatic", "afroamerican", "latino", "european", "otro"],
                             p = [0.05, 0.10, 0.25, 0.55, 0.05], size = 2000)
voter_ideo = np.random.choice(a = ['democrat', 'republican', 'independent'], p = [0.4, 0.35, 0.25], size = 2000)

In [10]:
voters = pd.DataFrame({'etnie': voter_etni, 'ideology': voter_ideo})

In [11]:
voters

Unnamed: 0,etnie,ideology
0,latino,democrat
1,afroamerican,democrat
2,afroamerican,democrat
3,european,democrat
4,european,republican
...,...,...
1995,asiatic,republican
1996,latino,independent
1997,european,independent
1998,european,democrat


In [17]:
voters_tab = pd.crosstab(voters.etnie, voters.ideology, margins = True)
voters_tab

ideology,democrat,independent,republican,All
etnie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
afroamerican,73,38,77,188
asiatic,37,22,41,100
european,449,269,384,1102
latino,216,110,199,525
otro,36,23,26,85
All,811,462,727,2000


In [27]:
o_ij = voters_tab.iloc[0:5, 0:3]
o_ij

ideology,democrat,independent,republican
etnie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
afroamerican,73,38,77
asiatic,37,22,41
european,449,269,384
latino,216,110,199
otro,36,23,26


$$\chi_0 = \sum_{i=1}^I\sum_{j=1}^J\frac{(o_{ij} - e_{ij})^2}{e_{ij}}$$

In [28]:
e_ij = np.outer(voters_tab['All'][0:5], voters_tab.loc['All'][0:3])/voters_tab.loc['All']['All']
e_ij = pd.DataFrame(e_ij)
e_ij.columns = o_ij.columns
e_ij.index = o_ij.index
e_ij

ideology,democrat,independent,republican
etnie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
afroamerican,76.234,43.428,68.338
asiatic,40.55,23.1,36.35
european,446.861,254.562,400.577
latino,212.8875,121.275,190.8375
otro,34.4675,19.635,30.8975


In [30]:
chi0 = ((o_ij-e_ij)**2/e_ij).sum().sum()
chi0

7.250684658251664

In [32]:
alpha = 0.05
critval = stats.chi2.ppf(q = 1-alpha, df = (5-1)*(3-1))
critval

15.50731305586545

In [33]:
critval > chi0

True

In [36]:
p_val = 1 - stats.chi2.cdf(x = chi0, df = (5-1)*(3-1))
p_val

0.509843132177107

In [37]:
p_val > alpha

True

In [38]:
stats.chi2_contingency(observed = o_ij)

(7.2506846582516635,
 0.5098431321771072,
 8,
 array([[ 76.234 ,  43.428 ,  68.338 ],
        [ 40.55  ,  23.1   ,  36.35  ],
        [446.861 , 254.562 , 400.577 ],
        [212.8875, 121.275 , 190.8375],
        [ 34.4675,  19.635 ,  30.8975]]))

In [40]:
Chi0, pval, df, crosstab = stats.chi2_contingency(observed = o_ij)

In [41]:
'Tenemos un Chi0 = {}, un p_valor = {} con {} grados de libertad'.format(chi0, pval, df)

'Tenemos un Chi0 = 7.250684658251664, un p_valor = 0.5098431321771072 con 8 grados de libertad'

In [42]:
crosstab

array([[ 76.234 ,  43.428 ,  68.338 ],
       [ 40.55  ,  23.1   ,  36.35  ],
       [446.861 , 254.562 , 400.577 ],
       [212.8875, 121.275 , 190.8375],
       [ 34.4675,  19.635 ,  30.8975]])