# Multiple Factor Analysis (MFA) with categorical variables

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")

In [2]:
import pyreadr
import pandas as pd
result = pyreadr.read_r('./data/poison.rda') # also works for Rds, rda

# done! let's see what we got
# result is a dictionary where keys are the name of objects and the values python
# objects
print(result.keys()) # let's check what objects we got

odict_keys(['poison'])


In [3]:
# Chargement des données
poison = result["poison"]
poison.head(6)

Unnamed: 0_level_0,Age,Time,Sick,Sex,Nausea,Vomiting,Abdominals,Fever,Diarrhae,Potato,Fish,Mayo,Courgette,Cheese,Icecream
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,9,22,Sick_y,F,Nausea_y,Vomit_n,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
2,5,0,Sick_n,F,Nausea_n,Vomit_n,Abdo_n,Fever_n,Diarrhea_n,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_n,Icecream_y
3,6,16,Sick_y,F,Nausea_n,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
4,9,0,Sick_n,F,Nausea_n,Vomit_n,Abdo_n,Fever_n,Diarrhea_n,Potato_y,Fish_y,Mayo_n,Courg_y,Cheese_y,Icecream_y
5,7,14,Sick_y,M,Nausea_n,Vomit_y,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_y,Mayo_y,Courg_y,Cheese_y,Icecream_y
6,72,9,Sick_y,M,Nausea_n,Vomit_n,Abdo_y,Fever_y,Diarrhea_y,Potato_y,Fish_n,Mayo_y,Courg_y,Cheese_y,Icecream_y


In [4]:
poison.info()

<class 'pandas.core.frame.DataFrame'>
Index: 55 entries, 1 to 55
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   Age         55 non-null     int32   
 1   Time        55 non-null     int32   
 2   Sick        55 non-null     category
 3   Sex         55 non-null     category
 4   Nausea      55 non-null     category
 5   Vomiting    55 non-null     category
 6   Abdominals  55 non-null     category
 7   Fever       55 non-null     category
 8   Diarrhae    55 non-null     category
 9   Potato      55 non-null     category
 10  Fish        55 non-null     category
 11  Mayo        55 non-null     category
 12  Courgette   55 non-null     category
 13  Cheese      55 non-null     category
 14  Icecream    55 non-null     category
dtypes: category(13), int32(2)
memory usage: 3.1+ KB


In [5]:
poison2 = pd.DataFrame(data=poison.values,
                     columns = pd.MultiIndex.from_tuples(
    [
        ("desc","Age"),
        ("desc","Time"),
        ("desc2","Sick"),
        ("desc2","Sex"),
        ("symptom","Nausea"),
        ("symptom","Vomiting"),
        ("symptom","Abdominals"),
        ("symptom","Fever"),
        ("symptom","Diarrhae"),
        ("eat","Potato"),
        ("eat","Fish"),
        ("eat","Mayo"),
        ("eat","Courgette"),
        ("eat","Cheese"),
        ("eat","Icecream") 
    ]
))
poison2.index= poison.index

In [6]:
group = poison2.columns.levels[0].drop(["desc","desc2"]).tolist()
group

['eat', 'symptom']

In [7]:
group_sup = poison2.columns.levels[0].drop(group).tolist()
group_sup

['desc', 'desc2']

In [8]:
poison2["desc"] = poison2["desc"].astype("float")

In [9]:
from scientisttools.decomposition import MFA

res_mfa = MFA(n_components=None,
              group=group,
              group_sup=group_sup,
              row_labels=poison2.index,
              parallelize=True)
# Instanciation
res_mfa.fit(poison2)

ValueError: Length of s must be M or N.

## Eigenvalues

In [None]:
from scientisttools.extractfactor import get_eig
eig = get_eig(res_mfa)
eig

## Separate analyses

In [None]:
res_mfa.separate_analyses_.keys()

## Individuals informations

In [None]:
from scientisttools.extractfactor import get_mfa_ind
ind = get_mfa_ind(res_mfa)
ind.keys()

### Coordinates

In [None]:
ind["coord"].head(6)

### Cos2

In [None]:
ind["cos2"].head(6)

### Contributions

In [None]:
ind["contrib"].head(6)

### Partiel coordinates

In [None]:
ind["coord_partiel"].head(6)

### Within inertia

In [None]:
ind["within_inertia"].head(6)

### Within partial inertia

In [None]:
ind["within_partial_inertia"].head(6).round(6)

In [None]:
res_mfa.group_sup_

## Categories

In [None]:
from scientisttools.extractfactor import get_mfa_var
quali_var = get_mfa_var(res_mfa,element="quali_var")
quali_var.keys()

### Coordinates

In [None]:
quali_var["coord"]

### Cos2

In [None]:
quali_var["cos2"]

### Contributions

In [None]:
# A vérifier
quali_var["contrib"]

### VTest

In [None]:
quali_var["vtest"]

### Coord partial

In [None]:
quali_var["coord_partiel"]

### whithin inertia

In [None]:
quali_var["within_inertia"] # A vérifier

### Whitin partial inertia

In [None]:
quali_var["within_partial_inertia"] # A vérifier

## Groups

In [None]:
group = get_mfa_var(res_mfa,element="group")
group.keys()

### Coordinates

In [None]:
group["coord"].head(6)

### Contributions

In [None]:
group["contrib"].head(6)

### Correlation

In [None]:
group["correlation"].head(6)

### Lg

In [None]:
group["Lg"]

### RV

In [None]:
group["RV"]

### Dist

In [None]:
group["dist"]

### Cos2

In [None]:
group["cos2"]

### Supplementary groups

In [None]:
group_sup = group["sup"]
group_sup.keys()

#### Coordinates

In [None]:
group_sup["coord"]

#### Dist2

In [None]:
group_sup["dist"]

#### Cos2

In [None]:
group_sup["cos2"]

## Partial axes

In [None]:
from scientisttools.extractfactor import get_mfa_partial_axes
partial_axes = get_mfa_partial_axes(res_mfa)
partial_axes.keys()

### Coordinates

In [None]:
partial_axes["coord"]

### Cor

In [None]:
partial_axes["cor"]

### Corr between

In [None]:
partial_axes["cor_between"].round(5)

## Inertia ratio

In [None]:
res_mfa.inertia_ratio_

## Inertia ratio

In [None]:
res_mfa.inertia_ratio_

## Supplementary qualitatives variables

In [None]:
quali_var_sup = quali_var["sup"]
quali_var_sup.keys()

### Coordinates

In [None]:
quali_var_sup["coord"].head(6)

### Cos2

In [None]:
quali_var_sup["cos2"].head(6)

### VTest

In [None]:
quali_var_sup["vtest"].head(6)

### Coord partiel

In [None]:
quali_var_sup["coord_partiel"].head(6)

## Supplementary continues columns

In [None]:
quanti_var_sup =get_mfa_var(res_mfa,element="quanti_var")["sup"]
quanti_var_sup.keys()

#### Coordinates

In [None]:
quanti_var_sup["coord"].head(6)

### Cor

In [None]:
quanti_var_sup["cor"].head(6)

### Cos2

In [None]:
quanti_var_sup["cos2"].head(6)

## Summary

In [None]:
res_mfa.summary_quanti_

In [None]:
res_mfa.summary_quali_

In [None]:
X = res_mfa.active_data_
res_mfa.transform(X)