# DMFA - `decathlon` dataset

In [1]:
#disable warnings
from warnings import simplefilter, filterwarnings
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore")
#print DataFrame/Series
from great_tables import GT, html
def print_dt(data,title=None,subtitle=None,rowname=None,digits=4):
    dt = (GT(data=data.round(digits).rename_axis(rowname).reset_index())
          .tab_header(title=title, subtitle=subtitle))
    return dt

## `decathlon` dataset

In [None]:
#decathlon dataset
from scientisttools.datasets import decathlon
(
    GT(decathlon.iloc[:41,:].rename_axis("Individuals").reset_index())
    .tab_header(title=html("<b>Decathlon Dataset</b>"))
    .tab_spanner(label=html("<b>Individuals</b>"),columns="Individuals")
    .tab_spanner(label=html("<b>Active <br>variables</b>"),columns=decathlon.columns.tolist()[:10])
    .tab_spanner(label=html("<b>Supp. <br>variables</b>"),columns=decathlon.columns.tolist()[10:13])
)

Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset
Individuals,Active variables,Active variables,Active variables,Active variables,Active variables,Active variables,Active variables,Active variables,Active variables,Active variables,Supp. variables,Supp. variables,Supp. variables
Individuals,X100m,Long.jump,Shot.put,High.jump,X400m,X110m.hurdle,Discus,Pole.vault,Javeline,X1500m,Rank,Points,Competition
SEBRLE,11.04,7.58,14.83,2.07,49.81,14.69,43.75,5.02,63.19,291.7,1.0,8217.0,Decastar
CLAY,10.76,7.4,14.26,1.86,49.37,14.05,50.72,4.92,60.15,301.5,2.0,8122.0,Decastar
KARPOV,11.02,7.3,14.77,2.04,48.37,14.09,48.95,4.92,50.31,300.2,3.0,8099.0,Decastar
BERNARD,11.02,7.23,14.25,1.92,48.93,14.99,40.87,5.32,62.77,280.1,4.0,8067.0,Decastar
YURKOV,11.34,7.09,15.19,2.1,50.42,15.31,46.26,4.72,63.44,276.4,5.0,8036.0,Decastar
WARNERS,11.11,7.6,14.31,1.98,48.68,14.23,41.1,4.92,51.77,278.1,6.0,8030.0,Decastar
ZSIVOCZKY,11.13,7.3,13.48,2.01,48.62,14.17,45.67,4.42,55.37,268.0,7.0,8004.0,Decastar
McMULLEN,10.83,7.31,13.76,2.13,49.91,14.38,44.41,4.42,56.37,285.1,8.0,7995.0,Decastar
MARTINEAU,11.64,6.81,14.57,1.95,50.14,14.93,47.6,4.92,52.33,262.1,9.0,7802.0,Decastar
HERNU,11.37,7.56,14.41,1.86,51.1,15.06,44.99,4.82,57.19,285.1,10.0,7733.0,Decastar


## Instanciation & training

In [3]:
# Dual Multiple Factor Analysis (DMFA)
from scientisttools import DMFA
res_dmfa = DMFA(group=12,sup_var=(10,11))

### `fit` function

In [4]:
#fit function
res_dmfa.fit(decathlon.iloc[:41,:])

### `fit_transform` function

In [None]:
#fit_transform function
print_dt(res_dmfa.fit_transform(decathlon.iloc[:41,:]),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
SEBRLE,1.5979,1.0157,0.5831,1.4787,-0.6618
CLAY,1.9963,0.5423,1.8195,-0.2628,-1.3009
KARPOV,2.2324,0.393,1.2802,-1.8911,1.0502
BERNARD,0.1777,-0.7265,1.0475,2.5063,0.3419
YURKOV,-0.1176,2.4916,-1.5844,1.1907,0.2727
WARNERS,1.2003,-1.7437,0.4656,-0.4188,0.6635
ZSIVOCZKY,0.8074,-1.4055,-1.8819,-1.3211,-0.3659
McMULLEN,1.161,0.0687,-0.9797,-1.418,-1.4818
MARTINEAU,-1.7102,0.7636,-1.0681,-0.3704,2.5734
HERNU,-1.0093,0.7424,0.7111,0.3894,-0.6908


### `transform` function

In [None]:
#transform function
print_dt(res_dmfa.transform(res_dmfa.call_.X),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
SEBRLE,1.5979,1.0157,0.5831,1.4787,-0.6618
CLAY,1.9963,0.5423,1.8195,-0.2628,-1.3009
KARPOV,2.2324,0.393,1.2802,-1.8911,1.0502
BERNARD,0.1777,-0.7265,1.0475,2.5063,0.3419
YURKOV,-0.1176,2.4916,-1.5844,1.1907,0.2727
WARNERS,1.2003,-1.7437,0.4656,-0.4188,0.6635
ZSIVOCZKY,0.8074,-1.4055,-1.8819,-1.3211,-0.3659
McMULLEN,1.161,0.0687,-0.9797,-1.418,-1.4818
MARTINEAU,-1.7102,0.7636,-1.0681,-0.3704,2.5734
HERNU,-1.0093,0.7424,0.7111,0.3894,-0.6908


## Eigen values

In [7]:
#eigen values
from scientisttools import get_eig
print_dt(get_eig(res_dmfa),rowname="Dimensions",title=html("<b>Eigen values</b>"))

Eigen values,Eigen values,Eigen values,Eigen values,Eigen values
Dimensions,Eigenvalue,Difference,Proportion,Cumulative
Dim.1,3.2681,1.4732,32.6811,32.6811
Dim.2,1.7949,0.3804,17.949,50.6302
Dim.3,1.4145,0.3247,14.1454,64.7756
Dim.4,1.0898,0.4245,10.8984,75.674
Dim.5,0.6654,0.0539,6.6537,82.3277
Dim.6,0.6114,0.1997,6.1143,88.4421
Dim.7,0.4118,0.0635,4.1177,92.5598
Dim.8,0.3482,0.126,3.4825,96.0423
Dim.9,0.2222,0.0486,2.222,98.2643
Dim.10,0.1736,,1.7357,100.0


## Individuals informations

In [9]:
#individuals informations
ind = res_dmfa.ind_
ind._fields

('coord', 'cos2', 'contrib', 'infos')

### Individuals coordinates

In [10]:
#individuals coordinates
print_dt(ind.coord,rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
SEBRLE,1.5979,1.0157,0.5831,1.4787,-0.6618
CLAY,1.9963,0.5423,1.8195,-0.2628,-1.3009
KARPOV,2.2324,0.393,1.2802,-1.8911,1.0502
BERNARD,0.1777,-0.7265,1.0475,2.5063,0.3419
YURKOV,-0.1176,2.4916,-1.5844,1.1907,0.2727
WARNERS,1.2003,-1.7437,0.4656,-0.4188,0.6635
ZSIVOCZKY,0.8074,-1.4055,-1.8819,-1.3211,-0.3659
McMULLEN,1.161,0.0687,-0.9797,-1.418,-1.4818
MARTINEAU,-1.7102,0.7636,-1.0681,-0.3704,2.5734
HERNU,-1.0093,0.7424,0.7111,0.3894,-0.6908


## Supplementary quantitative variables informations

In [11]:
#supplementary quantitative variables informations
quanti_sup = res_dmfa.quanti_sup_
quanti_sup._fields

('coord', 'cos2')

### Supplementary quantitative variables coordinates

In [12]:
#supplementary quantitative coordinates
print_dt(quanti_sup.coord,rowname="Variables",title=html("<b>Supplementary quantitative variables coordinates</b>"))

Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Rank,-0.8912,-0.0359,0.0124,-0.2615,-0.1369
Points,0.9478,0.0252,-0.0899,0.2438,0.0871


### Supplementary quantitative variables cos2

In [13]:
#supplementary quantitative cos2
print_dt(quanti_sup.cos2,rowname="Variables",title=html("<b>Supplementary quantitative variables cos2</b>"))

Supplementary quantitative variables cos2,Supplementary quantitative variables cos2,Supplementary quantitative variables cos2,Supplementary quantitative variables cos2,Supplementary quantitative variables cos2,Supplementary quantitative variables cos2
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Rank,0.7942,0.0013,0.0002,0.0684,0.0187
Points,0.8983,0.0006,0.0081,0.0595,0.0076
