# Specific Multiple Correspondence Analysis with Instrumental Variables (speMCAIV) - `poison` dataset

In [1]:
#disable warnings
from warnings import simplefilter, filterwarnings
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore")
from great_tables import GT, html
def print_dt(data,title=None,subtitle=None,rowname=None,digits=6):
    dt = (GT(data=data.round(digits).rename_axis(rowname).reset_index())
          .tab_header(title=title, subtitle=subtitle))
    return dt

## `poison` dataset

In [2]:
#decathlon dataset
from scientisttools.datasets import poison
(
    GT(poison.rename_axis("Individuals").reset_index())
    .tab_header(title=html("<b>Poison Dataset</b>"))
    .tab_spanner(label=html("<b>Individuals</b>"),columns="Individuals")
    .tab_spanner(label=html("<b>Dependent<br>variables</b>"),columns=poison.columns.tolist()[2:])
    .tab_spanner(label=html("<b>Instrumental<br>variables</b>"),columns=poison.columns.tolist()[:2])
)

Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset,Poison Dataset
Individuals,Instrumental variables,Instrumental variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables
Individuals,Age,Time,Sick,Sex,Nausea,Vomiting,Abdominals,Fever,Diarrhae,Potato,Fish,Mayo,Courgette,Cheese,Icecream
1,9,22,Sick_y,F,Nausea_y,Vomit_n,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
2,5,0,Sick_n,F,Nausea_n,Vomit_n,Abdo_n,Fever_n,Diarrhea_n,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_n,Icecream_y
3,6,16,Sick_y,F,Nausea_n,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
4,9,0,Sick_n,F,Nausea_n,Vomit_n,Abdo_n,Fever_n,Diarrhea_n,Potato_y,Fish_y,Mayo_n,Courg_y,Cheese_y,Icecream_y
5,7,14,Sick_y,M,Nausea_n,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
6,72,9,Sick_y,M,Nausea_n,Vomit_n,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_n,Mayo_y,Courg_y,Cheese_y,Icecream_y
7,5,16,Sick_y,F,Nausea_n,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
8,10,8,Sick_y,F,Nausea_y,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
9,5,20,Sick_y,M,Nausea_y,Vomit_n,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
10,11,12,Sick_y,M,Nausea_n,Vomit_y,Abdo_n,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y


## Instanciation and training

In [3]:
from scientisttools import MCAIV
#instanciation
res_mcaiv = MCAIV(iv=(0,1),excl=(0,2))

## `fit` function

In [4]:
#fit
res_mcaiv.fit(poison)

## `fit_transform` function

In [5]:
#fit_transform
print_dt(res_mcaiv.fit_transform(poison),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2
1,-0.717646,0.038801
2,0.633755,0.053442
3,-0.347296,0.051635
4,0.630013,0.034918
5,-0.225717,0.046651
6,0.019755,-0.255251
7,-0.34636,0.056266
8,0.139019,0.031699
9,-0.591389,0.056972
10,-0.106945,0.027774


## Ratio

In [6]:
#ratio
res_mcaiv.ratio_

0.25943854980216763

## Eigen values

In [7]:
#PCA eigen values
from scientisttools import get_eig
print_dt(get_eig(res_mcaiv),rowname="Dimensions",title=html("<b>Eigen values</b>"))

Eigen values,Eigen values,Eigen values,Eigen values,Eigen values
Dimensions,Eigenvalue,Difference,Proportion,Cumulative
Dim.1,0.223934,0.212015,94.946367,94.946367
Dim.2,0.011919,,5.053633,100.0


## Individuals informations

In [8]:
#individuals informations
from scientisttools import get_mcaiv_ind
ind = get_mcaiv_ind(res_mcaiv)
ind._fields

('coord', 'cos2', 'contrib', 'infos')

### Individuals coordinates

In [9]:
#individuals factor coordinates
print_dt(ind.coord,rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2
1,-0.717646,0.038801
2,0.633755,0.053442
3,-0.347296,0.051635
4,0.630013,0.034918
5,-0.225717,0.046651
6,0.019755,-0.255251
7,-0.34636,0.056266
8,0.139019,0.031699
9,-0.591389,0.056972
10,-0.106945,0.027774


### Individuals contributions

In [10]:
#individuals contributions
print_dt(ind.contrib,rowname="Individuals",title=html("<b>Individuals contributions</b>"))

Individuals contributions,Individuals contributions,Individuals contributions
Individuals,Dim.1,Dim.2
1,4.181555,0.229657
2,3.261072,0.435669
3,0.979302,0.406708
4,3.222672,0.185987
5,0.413662,0.331982
6,0.003169,9.938633
7,0.974033,0.482934
8,0.156916,0.153277
9,2.839645,0.49513
10,0.092862,0.117669


### Individuals squared cosinus (cos2) 

In [11]:
#individuals squared cosinus (cos2)
print_dt(ind.cos2,rowname="Individuals",title=html("<b>Individuals squared cosinus (Cos2)</b>"))

Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2)
Individuals,Dim.1,Dim.2
1,0.997085,0.002915
2,0.992939,0.007061
3,0.978373,0.021627
4,0.996938,0.003062
5,0.959034,0.040966
6,0.005954,0.994046
7,0.974289,0.025711
8,0.950578,0.049422
9,0.990805,0.009195
10,0.936817,0.063183


### Additionals informations

In [12]:
#individuals additionals informations (weight,squared distance to origin, inertia)
print_dt(ind.infos,rowname="Individuals",title=html("<b>Individuals additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)"
Individuals,Weight,Sq. Dist.,Inertia,% Inertia
1,0.018182,0.516521,0.009391,3.981841
2,0.018182,0.404502,0.007355,3.118286
3,0.018182,0.123281,0.002241,0.950365
4,0.018182,0.398136,0.007239,3.069209
5,0.018182,0.053125,0.000966,0.409535
6,0.018182,0.065543,0.001192,0.505271
7,0.018182,0.123131,0.002239,0.949214
8,0.018182,0.020331,0.00037,0.156733
9,0.018182,0.352987,0.006418,2.721162
10,0.018182,0.012209,0.000222,0.094116


## Variables informations

In [13]:
#variables informations
from scientisttools import get_mcaiv_var
var = get_mcaiv_var(res_mcaiv)
var._fields

('coord', 'cos2', 'contrib', 'infos')

### Variables coordinates

In [14]:
#variables coordinates
print_dt(var.coord,rowname="Variables",title=html("<b>Variables coordinates</b>"))

Variables coordinates,Variables coordinates,Variables coordinates
Variables,Dim.1,Dim.2
Dim.1,0.469294,7.8e-05
Dim.2,0.014008,0.009026
Dim.3,0.01741,0.077802
Dim.4,0.012442,-0.055534
Dim.5,-0.005798,-0.002146
Dim.6,0.023774,-0.034243
Dim.7,0.040459,-0.008075
Dim.8,0.011976,0.000994
Dim.9,0.010235,0.037636
Dim.10,-0.015691,0.00119


### Variables contributions

In [15]:
#variables contributions
print_dt(var.contrib,rowname="Variables",title=html("<b>Variables contributions</b>"))

Variables contributions,Variables contributions,Variables contributions
Variables,Dim.1,Dim.2
Dim.1,98.349034,5.1e-05
Dim.2,0.087623,0.683547
Dim.3,0.13536,50.785319
Dim.4,0.069132,25.874089
Dim.5,0.015012,0.038639
Dim.6,0.252402,9.837873
Dim.7,0.730988,0.547075
Dim.8,0.064052,0.008281
Dim.9,0.046779,11.883862
Dim.10,0.109953,0.011883


### Variables squared cosinus (cos2)

In [16]:
#variables squared cosinus (cos2)
print_dt(var.cos2,rowname="Variables",title=html("<b>Variables squared cosinus (cos2)</b>"))

Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2)
Variables,Dim.1,Dim.2
Dim.1,1.0,0.0
Dim.2,0.706606,0.293394
Dim.3,0.047688,0.952312
Dim.4,0.047799,0.952201
Dim.5,0.879507,0.120493
Dim.6,0.325246,0.674754
Dim.7,0.961691,0.038309
Dim.8,0.993165,0.006835
Dim.9,0.068863,0.931137
Dim.10,0.99428,0.00572


### Addtionals informations

In [17]:
#variables additionals informations
print_dt(var.infos,rowname="Variables",title=html("<b>Variables additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)"
Variables,Weight,Sq. Dist.,Inertia,% Inertia
Dim.1,1.0,0.220237,0.220237,93.378837
Dim.2,1.0,0.000278,0.000278,0.117739
Dim.3,1.0,0.006356,0.006356,2.695023
Dim.4,1.0,0.003239,0.003239,1.37322
Dim.5,1.0,3.8e-05,3.8e-05,0.016206
Dim.6,1.0,0.001738,0.001738,0.736817
Dim.7,1.0,0.001702,0.001702,0.721694
Dim.8,1.0,0.000144,0.000144,0.061234
Dim.9,1.0,0.001521,0.001521,0.644982
Dim.10,1.0,0.000248,0.000248,0.104996


## Supplementary quantitative variables

In [18]:
#supplementary quantitative variables
quanti_sup = res_mcaiv.quanti_sup_
quanti_sup._fields

('coord', 'cos2')

### Supplementary quantitative variables coordinates

In [19]:
#supplementary quantitative variables coordinates
print_dt(quanti_sup.coord,rowname="Variables",title=html("<b>Supplementary quantitative variables <br> coordinates</b>"))

Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates
Variables,Dim.1,Dim.2
Age,-0.012489,-0.999922
Time,-0.998916,0.046558


### Supplementary quantitative variables squared cosinus (cos2)

In [20]:
#supplementary quantitative variables squared cosinus (cos2)
print_dt(quanti_sup.cos2,rowname="Variables",title=html("<b>Supplementary quantitative variables <br> squared cosinus (cos2)</b>"))

Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2)
Variables,Dim.1,Dim.2
Age,0.000156,0.999844
Time,0.997832,0.002168


## Supplementary qualitative variables/categories

In [21]:
#supplementary qualitative variables informations
quali_sup = res_mcaiv.quali_sup_
quali_sup._fields

('barycentre', 'coord', 'cos2', 'vtest', 'dist2', 'eta2')

### Supplementary variables/categories coordinates

In [22]:
#supplementary variables/categories coordinates
print_dt(quali_sup.coord,rowname="Categories",title=html("<b>Supplementary variables/categories <br> coordinates</b>"))

Supplementary variables/categories coordinates,Supplementary variables/categories coordinates,Supplementary variables/categories coordinates
Categories,Dim.1,Dim.2
Sick_n,0.622418,-0.002676
Sick_y,-0.27845,0.001197
F,0.013362,-0.005175
M,-0.013857,0.005366
Nausea_n,0.075932,-0.005405
Nausea_y,-0.272091,0.019368
Vomit_n,0.155043,-0.008507
Vomit_y,-0.232564,0.01276
Abdo_n,0.581898,-0.000984
Abdo_y,-0.283085,0.000479


### Supplementary variables/categories squared cosinus (cos2)

In [23]:
#supplementary variables/categories squared cosinus (cos2)
print_dt(quali_sup.cos2,rowname="Categories",title=html("<b>Supplementary variables/categories <br> squared cosinus (cos2)</b>"))

Supplementary variables/categories squared cosinus (cos2),Supplementary variables/categories squared cosinus (cos2),Supplementary variables/categories squared cosinus (cos2)
Categories,Dim.1,Dim.2
Sick_n,0.999982,1.8e-05
Sick_y,0.999982,1.8e-05
F,0.869585,0.130415
M,0.869585,0.130415
Nausea_n,0.994959,0.005041
Nausea_y,0.994959,0.005041
Vomit_n,0.996999,0.003001
Vomit_y,0.996999,0.003001
Abdo_n,0.999997,3e-06
Abdo_y,0.999997,3e-06


### Supplementary variables/categories squared distance to origin (dist2)

In [24]:
#supplementary variables/categories squared distance to origin (dist2)
print_dt(quali_sup.dist2,rowname="Categories",title=html("<b>Supplementary variables/categories <br> squared distance <br> to origin (dist2)</b>"))

Supplementary variables/categories squared distance to origin (dist2),Supplementary variables/categories squared distance to origin (dist2)
Categories,Sq. Dist.
Sick_n,0.387411
Sick_y,0.077536
F,0.000205
M,0.000221
Nausea_n,0.005795
Nausea_y,0.074409
Vomit_n,0.024111
Vomit_y,0.054249
Abdo_n,0.338606
Abdo_y,0.080138


### Supplementary variables/categories value-test (vtest)

In [25]:
#supplementary variables/categories value-test (vtest)
print_dt(quali_sup.vtest,rowname="Categories",title=html("<b>Supplementary variables/categories <br> value-test (vtest)</b>"))

Supplementary variables/categories value-test (vtest),Supplementary variables/categories value-test (vtest),Supplementary variables/categories value-test (vtest)
Categories,Dim.1,Dim.2
Sick_n,6.464749,-0.120461
Sick_y,-6.464749,0.120461
F,0.211309,-0.3547
M,-0.211309,0.3547
Nausea_n,2.232067,-0.688686
Nausea_y,-2.232067,0.688686
Vomit_n,2.94872,-0.701264
Vomit_y,-2.94872,0.701264
Abdo_n,6.302589,-0.046199
Abdo_y,-6.302589,0.046199


### Squared correlation ratio

In [26]:
#supplementary qualitative variables squared correlation ratio (eta2)
print_dt(quali_sup.eta2,rowname="Categories",title=html("<b>Supplementary qualitative variables <br> squared correlation ratio (eta2)</b>"))

Supplementary qualitative variables squared correlation ratio (eta2),Supplementary qualitative variables squared correlation ratio (eta2),Supplementary qualitative variables squared correlation ratio (eta2)
Categories,Dim.1,Dim.2
Sick,0.773944,0.000269
Sex,0.000827,0.00233
Nausea,0.092262,0.008783
Vomiting,0.161018,0.009107
Abdominals,0.735604,4e-05
Fever,0.640248,0.00033
Diarrhae,0.587185,0.000707
Potato,0.015608,0.010561
Fish,3.2e-05,0.101227
Mayo,0.261076,3.8e-05


# Descriptive statistiques

In [27]:
#descriptive statistics for quantitative variables
print_dt(res_mcaiv.summary_quanti_,rowname="",title=html("<b>Descriptive statistics of quantitative variables</b>"))

Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables
Unnamed: 0_level_1,variable,count,mean,std,min,25%,50%,75%,max
0,Age,55,16.927273,23.779741,4.0,6.0,8.0,10.0,88.0
1,Time,55,10.163636,7.800199,0.0,0.0,12.0,16.5,22.0


In [28]:
#summary_quali
print_dt(res_mcaiv.summary_quali_,rowname="",title=html("<b>Descriptive statistics of <br> qualitative variables</b>"))

Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables
Unnamed: 0_level_1,variable,categorie,count,proportion
0,Sick,Sick_y,38,0.690909
1,Sick,Sick_n,17,0.309091
2,Sex,F,28,0.509091
3,Sex,M,27,0.490909
4,Nausea,Nausea_n,43,0.781818
5,Nausea,Nausea_y,12,0.218182
6,Vomiting,Vomit_n,33,0.6
7,Vomiting,Vomit_y,22,0.4
8,Abdominals,Abdo_y,37,0.672727
9,Abdominals,Abdo_n,18,0.327273


# Extractions

In [29]:
#extract functions
from scientisttools import get_eig, get_mcaiv, summaryMCAIV
eig = get_eig(res_mcaiv)
ind, var = get_mcaiv(res_mcaiv,"ind"), get_mcaiv(res_mcaiv,"var")
summaryMCAIV(res_mcaiv)

         Multiple Correspondence Analysis with Instrumental Variables - Results               

Eigenvalues
                       Dim.1    Dim.2
Variance               0.224    0.012
Difference             0.212      NaN
% of var.             94.946    5.054
Cumulative % of var.  94.946  100.000

Individuals (the 10 first)

    Weight  Sq. Dist.  Inertia  % Inertia  Dim.1    ctr   cos2  Dim.2    ctr  \
1    0.018      0.517    0.009      3.982 -0.718  4.182  0.997  0.039  0.230   
2    0.018      0.405    0.007      3.118  0.634  3.261  0.993  0.053  0.436   
3    0.018      0.123    0.002      0.950 -0.347  0.979  0.978  0.052  0.407   
4    0.018      0.398    0.007      3.069  0.630  3.223  0.997  0.035  0.186   
5    0.018      0.053    0.001      0.410 -0.226  0.414  0.959  0.047  0.332   
6    0.018      0.066    0.001      0.505  0.020  0.003  0.006 -0.255  9.939   
7    0.018      0.123    0.002      0.949 -0.346  0.974  0.974  0.056  0.483   
8    0.018      0.020    0.000   