In [1]:
import wooldridge as woo
import numpy as np
import pandas as pd


In [2]:
affairs = woo.dataWoo('affairs')

affairs['ratemarr'] = affairs['ratemarr'] - 1

affairs['haskids'] = pd.Categorical.from_codes(affairs['kids'],
                                               categories=['no', 'yes'])

mlab = ['very unhappy', 'unhappy', 'average', 'happy', 'very happy']
affairs['marriage'] = pd.Categorical.from_codes(affairs['ratemarr'], categories=mlab)

In [3]:
affairs.head()

Unnamed: 0,id,male,age,yrsmarr,kids,relig,educ,occup,ratemarr,naffairs,...,vryhap,hapavg,avgmarr,unhap,vryrel,smerel,slghtrel,notrel,haskids,marriage
0,4,1,37.0,10.0,0,3,18,7,3,0,...,0,1,0,0,0,0,1,0,no,happy
1,5,0,27.0,4.0,0,4,14,6,3,0,...,0,1,0,0,0,1,0,0,no,happy
2,6,1,27.0,1.5,0,3,18,4,3,3,...,0,1,0,0,0,0,1,0,no,happy
3,11,0,32.0,15.0,1,1,12,1,3,0,...,0,1,0,0,0,0,0,0,yes,happy
4,12,0,27.0,4.0,1,3,17,1,4,3,...,1,0,0,0,0,0,1,0,yes,very happy


## Frequency table in Numpy

In [4]:
ft_np = np.unique(affairs['marriage'], return_counts=True)

unique_elem_np = ft_np[0]
counts_np = ft_np[1]
print(f'unique_elem_np: \n{unique_elem_np}\n')
print(f'counts_np: \n{counts_np}\n')

unique_elem_np: 
['average' 'happy' 'unhappy' 'very happy' 'very unhappy']

counts_np: 
[ 93 194  66 232  16]



## Frequency table in Pandas

In [5]:
ft_pd = affairs['marriage'].value_counts()
print(f'ft_pd: \n{ft_pd}\n')

ft_pd: 
very happy      232
happy           194
average          93
unhappy          66
very unhappy     16
Name: marriage, dtype: int64



## Frequency table with groupby

In [6]:
affairs['marriage'].groupby(affairs['haskids']).value_counts().sort_index()

haskids              
no       very unhappy      3
         unhappy           8
         average          24
         happy            40
         very happy       96
yes      very unhappy     13
         unhappy          58
         average          69
         happy           154
         very happy      136
Name: marriage, dtype: int64

## Contingency table in pandas

In [7]:
pd.crosstab(affairs['marriage'], affairs['haskids'])

haskids,no,yes
marriage,Unnamed: 1_level_1,Unnamed: 2_level_1
very unhappy,3,13
unhappy,8,58
average,24,69
happy,40,154
very happy,96,136


In [8]:
pd.crosstab(affairs['marriage'], affairs['haskids'], margins=3)

haskids,no,yes,All
marriage,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
very unhappy,3,13,16
unhappy,8,58,66
average,24,69,93
happy,40,154,194
very happy,96,136,232
All,171,430,601


In [9]:
pd.crosstab(affairs['marriage'], affairs['haskids'], normalize='all')

haskids,no,yes
marriage,Unnamed: 1_level_1,Unnamed: 2_level_1
very unhappy,0.004992,0.021631
unhappy,0.013311,0.096506
average,0.039933,0.114809
happy,0.066556,0.25624
very happy,0.159734,0.22629


In [10]:
pd.crosstab(affairs['marriage'], affairs['haskids'], normalize='index')

haskids,no,yes
marriage,Unnamed: 1_level_1,Unnamed: 2_level_1
very unhappy,0.1875,0.8125
unhappy,0.121212,0.878788
average,0.258065,0.741935
happy,0.206186,0.793814
very happy,0.413793,0.586207


In [11]:
pd.crosstab(affairs['marriage'], affairs['haskids'], normalize='columns')

haskids,no,yes
marriage,Unnamed: 1_level_1,Unnamed: 2_level_1
very unhappy,0.017544,0.030233
unhappy,0.046784,0.134884
average,0.140351,0.160465
happy,0.233918,0.35814
very happy,0.561404,0.316279
