# Multiple Factor Analysis (MFA) with categorical variables

In [1]:
import pyreadr
import pandas as pd
result = pyreadr.read_r('./data/poison.rda') # also works for Rds, rda

# done! let's see what we got
# result is a dictionary where keys are the name of objects and the values python
# objects
print(result.keys()) # let's check what objects we got

odict_keys(['poison'])


In [2]:
# Chargement des données
poison = result["poison"]
poison.head(6)

Unnamed: 0_level_0,Age,Time,Sick,Sex,Nausea,Vomiting,Abdominals,Fever,Diarrhae,Potato,Fish,Mayo,Courgette,Cheese,Icecream
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,9,22,Sick_y,F,Nausea_y,Vomit_n,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
2,5,0,Sick_n,F,Nausea_n,Vomit_n,Abdo_n,Fever_n,Diarrhea_n,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_n,Icecream_y
3,6,16,Sick_y,F,Nausea_n,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
4,9,0,Sick_n,F,Nausea_n,Vomit_n,Abdo_n,Fever_n,Diarrhea_n,Potato_y,Fish_y,Mayo_n,Courg_y,Cheese_y,Icecream_y
5,7,14,Sick_y,M,Nausea_n,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
6,72,9,Sick_y,M,Nausea_n,Vomit_n,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_n,Mayo_y,Courg_y,Cheese_y,Icecream_y


In [3]:
poison.info()

<class 'pandas.core.frame.DataFrame'>
Index: 55 entries, 1 to 55
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   Age         55 non-null     int32   
 1   Time        55 non-null     int32   
 2   Sick        55 non-null     category
 3   Sex         55 non-null     category
 4   Nausea      55 non-null     category
 5   Vomiting    55 non-null     category
 6   Abdominals  55 non-null     category
 7   Fever       55 non-null     category
 8   Diarrhae    55 non-null     category
 9   Potato      55 non-null     category
 10  Fish        55 non-null     category
 11  Mayo        55 non-null     category
 12  Courgette   55 non-null     category
 13  Cheese      55 non-null     category
 14  Icecream    55 non-null     category
dtypes: category(13), int32(2)
memory usage: 3.1+ KB


In [4]:
poison2 = pd.DataFrame(data=poison.values,
                     columns = pd.MultiIndex.from_tuples(
    [
        ("desc","Age"),
        ("desc","Time"),
        ("desc2","Sick"),
        ("desc2","Sex"),
        ("symptom","Nausea"),
        ("symptom","Vomiting"),
        ("symptom","Abdominals"),
        ("symptom","Fever"),
        ("symptom","Diarrhae"),
        ("eat","Potato"),
        ("eat","Fish"),
        ("eat","Mayo"),
        ("eat","Courgette"),
        ("eat","Cheese"),
        ("eat","Icecream") 
    ]
))
poison2.index= poison.index

In [5]:
group = poison2.columns.levels[0].drop(["desc","desc2"]).tolist()
group

['eat', 'symptom']

In [6]:
group_sup = poison2.columns.levels[0].drop(group).tolist()
group_sup

['desc', 'desc2']

In [7]:
poison2["desc"] = poison2["desc"].astype("float")

In [8]:
from scientisttools.decomposition import MFA

res_mfa = MFA(n_components=5,
              group=group,
              group_sup=group_sup,
              row_labels=poison2.index,
              parallelize=True)
# Instanciation
res_mfa.fit(poison2)

## Individuals informations

In [9]:
from scientisttools.extractfactor import get_mfa_ind
ind = get_mfa_ind(res_mfa)
ind.keys()

dict_keys(['coord', 'contrib', 'cos2', 'coord_partiel', 'within_inertia', 'within_partial_inertia'])

### Coordinates

In [10]:
ind["coord"].head(6)

Unnamed: 0_level_0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.898029,0.199683,-0.105271,0.06376,-0.246878
2,-1.655044,0.410953,0.166068,0.493356,1.441421
3,0.867304,-0.09907,0.271261,0.424187,-0.22509
4,-1.783917,0.568569,0.041795,0.098292,-0.620324
5,0.867304,-0.09907,0.271261,0.424187,-0.22509
6,1.12291,1.074409,4.269195,-3.896475,0.949817


### Cos2

In [11]:
ind["cos2"].head(6)

Unnamed: 0_level_0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.35651,0.017627,0.004899,0.001797,0.026944
2,0.396311,0.024434,0.00399,0.035216,0.300607
3,0.506517,0.006609,0.049548,0.121162,0.034117
4,0.600826,0.061033,0.00033,0.001824,0.07265
5,0.506517,0.006609,0.049548,0.121162,0.034117
6,0.034035,0.031158,0.491954,0.409805,0.024351


### Contributions

In [12]:
ind["contrib"].head(6)

Unnamed: 0_level_0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1.007508,0.08591,0.028403,0.011434,0.194815
2,3.422052,0.363872,0.070684,0.684602,6.641104
3,0.939745,0.021147,0.188593,0.506094,0.161947
4,3.97573,0.696515,0.004477,0.027174,1.229971
5,0.939745,0.021147,0.188593,0.506094,0.161947
6,1.575279,2.487155,46.713593,42.703234,2.883622


### Partiel coordinates

In [13]:
ind["coord_partiel"].head(6)

Unnamed: 0_level_0,eat,eat,eat,eat,eat,symptom,symptom,symptom,symptom,symptom
Unnamed: 0_level_1,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
rownames,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1,0.609012,0.505904,0.315333,0.66829,-0.473209,1.187047,-0.106538,-0.525875,-0.540771,-0.020547
2,-1.236266,-0.048449,0.267375,0.829477,2.906803,-2.073822,0.870356,0.064761,0.157236,-0.02396
3,0.609012,0.505904,0.315333,0.66829,-0.473209,1.125595,-0.704043,0.227189,0.180083,0.023028
4,-1.494012,0.266783,0.01883,0.039348,-1.216687,-2.073822,0.870356,0.064761,0.157236,-0.02396
5,0.609012,0.505904,0.315333,0.66829,-0.473209,1.125595,-0.704043,0.227189,0.180083,0.023028
6,1.652092,2.227131,8.451173,-7.480516,1.784165,0.593728,-0.078312,0.087217,-0.312435,0.115469


### Within inertia

In [None]:
ind["within_inertia"].head

## Categories

In [14]:
from scientisttools.extractfactor import get_mfa_var
quali_var = get_mfa_var(res_mfa,element="quali_var")
quali_var.keys()

dict_keys(['coord', 'contrib', 'cos2', 'vtest', 'coord_partiel', 'sup'])

### Coordinates

In [15]:
quali_var["coord"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Potato_Potato_n,1.268842,1.596391,-2.366085,-1.346946,0.950257
Potato_Potato_y,-0.073202,-0.092099,0.136505,0.077708,-0.054823
Fish_Fish_n,1.12291,1.074409,4.269195,-3.896475,0.949817
Fish_Fish_y,-0.020795,-0.019896,-0.079059,0.072157,-0.017589
Mayo_Mayo_n,-1.886646,-0.124385,-0.129656,-0.250615,-0.26069
Mayo_Mayo_y,0.419255,0.027641,0.028812,0.055692,0.057931


### Cos2

In [16]:
quali_var["cos2"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Potato_Potato_n,0.12585,0.199212,0.437621,0.14182,0.070586
Potato_Potato_y,0.12585,0.199212,0.437621,0.14182,0.070586
Fish_Fish_n,0.034035,0.031158,0.491954,0.409805,0.024351
Fish_Fish_y,0.034035,0.031158,0.491954,0.409805,0.024351
Mayo_Mayo_n,0.805713,0.003502,0.003805,0.014217,0.015383
Mayo_Mayo_y,0.805713,0.003502,0.003805,0.014217,0.015383


### Contributions

In [17]:
quali_var["contrib"].head(6) # A vérifier

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Potato_Potato_n,6.033978,16.472639,43.046041,15.308699,8.658881
Potato_Potato_y,0.348114,0.950345,2.483425,0.883194,0.499551
Fish_Fish_n,1.575279,2.487155,46.713593,42.703234,2.883622
Fish_Fish_y,0.029172,0.046058,0.865067,0.790801,0.0534
Mayo_Mayo_n,44.46806,0.333348,0.430859,1.766576,2.172231
Mayo_Mayo_y,9.881791,0.074077,0.095746,0.392572,0.482718


### VTest

In [18]:
quali_var["vtest"].head(6) # A vérifier

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Potato_Potato_n,1.856429,3.067313,-4.958418,-2.956962,2.22386
Potato_Potato_y,-1.856429,-3.067313,4.958418,2.956962,-2.22386
Fish_Fish_n,0.930808,1.169588,5.068775,-4.846316,1.259362
Fish_Fish_y,-0.930808,-1.169588,-5.068775,4.846316,-1.259362
Mayo_Mayo_n,-5.417464,-0.469052,-0.533261,-1.079787,-1.197361
Mayo_Mayo_y,5.417464,0.469052,0.533261,1.079787,1.197361


## Groups

In [19]:
group = get_mfa_var(res_mfa,element="group")
group.keys()

dict_keys(['coord', 'correlation', 'contrib', 'cos2', 'dist', 'Lg', 'RV', 'sup'])

### Coordinates

In [20]:
group["coord"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
eat,0.682687,0.733667,0.667509,0.601972,0.565175
symptom,0.77267,0.1102,0.041883,0.044457,0.003651


### Contributions

In [21]:
group["contrib"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
eat,46.908558,86.94107,94.095988,93.122721,99.358189
symptom,53.091442,13.05893,5.904012,6.877279,0.641811


### Correlation

In [23]:
group["correlation"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
eat,0.852713,0.937751,0.98934,0.986839,0.998928
symptom,0.87944,0.375828,0.39214,0.410891,0.138064


### Lg

In [25]:
group["Lg"] # A vérifier

Unnamed: 0,eat,symptom
eat,1.887959,0.180359
symptom,0.180359,0.998649


### RV

In [27]:
group["RV"] # A vérifier

Unnamed: 0,eat,symptom
eat,1.0,0.131351
symptom,0.131351,1.0


### Supplementary groups

In [28]:
group_sup = group["sup"]
group_sup.keys()

dict_keys(['coord'])

#### Coordinates

In [29]:
group_sup["coord"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
desc,0.631028,0.010531,0.090319,0.019718,0.003374
desc2,0.739229,0.054142,0.004757,0.007716,0.003679


## Partial axes

In [30]:
from scientisttools.extractfactor import get_mfa_partial_axes
partial_axes = get_mfa_partial_axes(res_mfa)
partial_axes.keys()

dict_keys(['coord', 'cor', 'contrib', 'cor_between', 'sup'])

### Coordinates

In [31]:
partial_axes["coord"].head(6)

Unnamed: 0_level_0,eat,eat,eat,eat,eat,eat,symptom,symptom,symptom,symptom,symptom
Unnamed: 0_level_1,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5,Dim.6,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Dim.1,0.746829,0.033724,-0.419456,0.000524,-0.004061,-0.108357,-0.8785,-0.03925,-0.03124,0.077429,-0.007699
Dim.2,0.602613,-0.136701,0.711745,-0.144195,0.028005,0.073707,0.280454,-0.245198,-0.259145,0.158463,-0.03154
Dim.3,0.076624,0.944205,0.157702,0.239616,0.001464,0.035066,0.032483,0.387441,-0.045685,-0.134917,-0.090248
Dim.4,0.089727,-0.268609,0.055406,0.938943,-0.101108,0.056936,0.042906,0.24324,0.35087,-0.084929,-0.036758
Dim.5,-0.006002,-0.027662,-0.018411,0.103461,0.993044,-0.000315,-0.001832,0.042317,-0.107016,0.070775,0.098938


### Cor

In [32]:
partial_axes["cor"].head(6)

Unnamed: 0_level_0,eat,eat,eat,eat,eat,eat,symptom,symptom,symptom,symptom,symptom
Unnamed: 0_level_1,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5,Dim.6,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Dim.1,0.746829,0.033724,-0.419456,0.000524,-0.004061,-0.108357,-0.8785,-0.03925,-0.03124,0.077429,-0.007699
Dim.2,0.602613,-0.136701,0.711745,-0.144195,0.028005,0.073707,0.280454,-0.245198,-0.259145,0.158463,-0.03154
Dim.3,0.076624,0.944205,0.157702,0.239616,0.001464,0.035066,0.032483,0.387441,-0.045685,-0.134917,-0.090248
Dim.4,0.089727,-0.268609,0.055406,0.938943,-0.101108,0.056936,0.042906,0.24324,0.35087,-0.084929,-0.036758
Dim.5,-0.006002,-0.027662,-0.018411,0.103461,0.993044,-0.000315,-0.001832,0.042317,-0.107016,0.070775,0.098938


### Corr between

In [34]:
partial_axes["cor_between"].head(6)

Unnamed: 0,Dim.1_eat,Dim.2_eat,Dim.3_eat,Dim.4_eat,Dim.5_eat,Dim.6_eat,Dim.1_symptom,Dim.2_symptom,Dim.3_symptom,Dim.4_symptom,Dim.5_symptom
Dim.1_eat,1.0,6.94962e-17,-1.181572e-17,5.596144e-16,-4.2078290000000004e-17,2.2469380000000002e-17,-0.384433,-0.09865,-0.109767,0.155337,-0.026522
Dim.2_eat,6.94962e-17,1.0,2.7697440000000004e-17,-3.537315e-16,1.010304e-16,-5.086595e-16,-0.032454,0.235374,-0.05489,-0.125795,-0.072365
Dim.3_eat,-1.181572e-17,2.7697440000000004e-17,1.0,6.99206e-18,4.0814200000000006e-17,1.428217e-16,0.374632,-0.084798,-0.161681,0.013237,-0.049572
Dim.4_eat,5.596144e-16,-3.537315e-16,6.99206e-18,1.0,1.50861e-16,-1.554751e-16,-0.006162,0.258911,0.262398,-0.147083,-0.048164
Dim.5_eat,-4.2078290000000004e-17,1.010304e-16,4.0814200000000006e-17,1.50861e-16,1.0,1.959204e-16,0.005221,-0.000727,-0.102842,0.079697,0.09375
Dim.6_eat,2.2469380000000002e-17,-5.086595e-16,1.428217e-16,-1.554751e-16,1.959204e-16,1.0,0.123103,0.056624,0.032048,0.004325,0.065551


In [None]:
from scientisttools.ggplot import fviz_mfa_axes
p = fviz_mfa_axes(res_mfa,color="group",group_sup=True)
print(p)

## Inertia ratio

In [35]:
res_mfa.inertia_ratio_

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Inertia ratio,0.749547,0.510051,0.539226,0.544376,0.504294


## Supplementary qualitatives variables

In [36]:
quali_var_sup = quali_var["sup"]
quali_var_sup.keys()

dict_keys(['stats', 'coord', 'cos2', 'dist', 'vtest', 'coord_partiel'])

### Coordinates

In [37]:
quali_var_sup["coord"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Sick_Sick_n,-1.568766,0.265845,0.011169,0.08216,-0.01863
Sick_Sick_y,0.701817,-0.118931,-0.004997,-0.036756,0.008334
Sex_F,-0.062928,-0.121518,-0.057346,0.045039,-0.04385
Sex_M,0.065258,0.126018,0.05947,-0.046707,0.045474


### Cos2

In [38]:
quali_var_sup["cos2"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Sick_Sick_n,0.924694,0.026554,4.7e-05,0.002536,0.00013
Sick_Sick_y,0.924694,0.026554,4.7e-05,0.002536,0.00013
Sex_F,0.118258,0.440988,0.098211,0.060579,0.057424
Sex_M,0.118258,0.440988,0.098211,0.060579,0.057424


### VTest

In [39]:
quali_var_sup["vtest"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Sick_Sick_n,-6.391502,1.422396,0.065179,0.502264,-0.121408
Sick_Sick_y,6.391502,-1.422396,-0.065179,-0.502264,0.121408
Sex_F,-0.390347,-0.989911,-0.509515,0.4192,-0.435088
Sex_M,0.390347,0.989911,0.509515,-0.4192,0.435088


### Coord partiel

In [40]:
quali_var_sup["coord_partiel"].head(6)

Unnamed: 0_level_0,eat,eat,eat,eat,eat,symptom,symptom,symptom,symptom,symptom
Unnamed: 0_level_1,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Sick_Sick_n,-1.063711,-0.338666,-0.042423,0.007085,-0.013299,-2.073822,0.870356,0.064761,0.157236,-0.02396
Sick_Sick_y,0.47587,0.151509,0.018979,-0.00317,0.00595,0.927763,-0.38937,-0.028972,-0.070342,0.010719
Sex_F,-0.105161,-0.251758,-0.118857,0.087297,-0.086243,-0.020694,0.008723,0.004164,0.00278,-0.001457
Sex_M,0.109056,0.261082,0.123259,-0.090531,0.089438,0.021461,-0.009046,-0.004319,-0.002883,0.001511


## Supplementary continues columns

In [45]:
quanti_var_sup =get_mfa_var(res_mfa,element="quanti_var")["sup"]
quanti_var_sup.keys()

dict_keys(['coord', 'cos2', 'cor'])

#### Cordinates

In [46]:
quanti_var_sup["coord"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Age,0.042032,0.031129,0.303664,-0.125451,0.057596
Time,0.806701,-0.099605,-0.034424,0.068209,-0.013089


### Cor

In [47]:
quanti_var_sup["cor"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Age,0.042032,0.031129,0.303664,-0.125451,0.057596
Time,0.806701,-0.099605,-0.034424,0.068209,-0.013089


### Cos2

In [49]:
quanti_var_sup["cos2"].head(6)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Age,0.001767,0.000969,0.092212,0.015738,0.003317
Time,0.650766,0.009921,0.001185,0.004652,0.000171


In [50]:
res_mfa.summary_quali_

Unnamed: 0,group,variable,modalite,effectif
0,eat,Potato,Potato_y,52
1,eat,Potato,Potato_n,3
0,eat,Fish,Fish_y,54
1,eat,Fish,Fish_n,1
0,eat,Mayo,Mayo_y,45
1,eat,Mayo,Mayo_n,10
0,eat,Courgette,Courg_y,50
1,eat,Courgette,Courg_n,5
0,eat,Cheese,Cheese_y,48
1,eat,Cheese,Cheese_n,7
