# PCAoiv - `decathlon` dataset

In [None]:
#disable warnings
from warnings import simplefilter, filterwarnings
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore")
from great_tables import GT, html
def print_dt(data,title=None,subtitle=None,rowname=None,digits=4):
    dt = (GT(data=data.round(digits).rename_axis(rowname).reset_index())
          .tab_header(title=title, subtitle=subtitle))
    return dt

## `decathlon` dataset

In [2]:
#decathlon dataset
from scientisttools.datasets import load_decathlon
decathlon = load_decathlon().iloc[:41,:].drop(columns="Rank")
(
    GT(decathlon.rename_axis("Individuals").reset_index())
    .tab_header(title=html("<b>Decathlon Dataset</b>"))
    .tab_spanner(label=html("<b>Individuals</b>"),columns="Individuals")
    .tab_spanner(label=html("<b>Dependent<br>variables</b>"),columns=decathlon.columns.tolist()[:10])
    .tab_spanner(label=html("<b>Explanatory (instrumental)<br>variables</b>"),columns=decathlon.columns.tolist()[10:])
)

Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset
Individuals,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Explanatory (instrumental) variables,Explanatory (instrumental) variables
Individuals,X100m,Long.jump,Shot.put,High.jump,X400m,X110m.hurdle,Discus,Pole.vault,Javeline,X1500m,Points,Competition
SEBRLE,11.04,7.58,14.83,2.07,49.81,14.69,43.75,5.02,63.19,291.7,8217.0,Decastar
CLAY,10.76,7.4,14.26,1.86,49.37,14.05,50.72,4.92,60.15,301.5,8122.0,Decastar
KARPOV,11.02,7.3,14.77,2.04,48.37,14.09,48.95,4.92,50.31,300.2,8099.0,Decastar
BERNARD,11.02,7.23,14.25,1.92,48.93,14.99,40.87,5.32,62.77,280.1,8067.0,Decastar
YURKOV,11.34,7.09,15.19,2.1,50.42,15.31,46.26,4.72,63.44,276.4,8036.0,Decastar
WARNERS,11.11,7.6,14.31,1.98,48.68,14.23,41.1,4.92,51.77,278.1,8030.0,Decastar
ZSIVOCZKY,11.13,7.3,13.48,2.01,48.62,14.17,45.67,4.42,55.37,268.0,8004.0,Decastar
McMULLEN,10.83,7.31,13.76,2.13,49.91,14.38,44.41,4.42,56.37,285.1,7995.0,Decastar
MARTINEAU,11.64,6.81,14.57,1.95,50.14,14.93,47.6,4.92,52.33,262.1,7802.0,Decastar
HERNU,11.37,7.56,14.41,1.86,51.1,15.06,44.99,4.82,57.19,285.1,7733.0,Decastar


## Instanciation and training

In [3]:
from scientisttools import PCAOIV
#instanciation
res_pcaoiv = PCAOIV(iv=(10,11))

## `fit` function

In [4]:
#fit
res_pcaoiv.fit(decathlon)

## `fit_transform` function

In [5]:
#fit_transform
print_dt(res_pcaoiv.fit_transform(decathlon),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
SEBRLE,0.830429,0.357013,1.159097,0.086348,1.210317
CLAY,0.682324,2.155342,-0.353584,-2.3679,-0.508816
KARPOV,0.620661,2.014828,-1.650821,0.496692,-0.733025
BERNARD,-0.861129,0.195334,2.439503,-0.150877,-0.11278
YURKOV,2.123507,-1.805207,0.848381,0.599318,0.235727
WARNERS,-1.61044,0.6278,-0.504561,0.670781,0.001557
ZSIVOCZKY,-1.050959,-1.196789,-1.881038,-0.392869,-0.122734
McMULLEN,0.293994,-0.276618,-1.776139,0.159172,1.759533
MARTINEAU,0.552322,-1.275708,0.010888,1.126412,-2.410472
HERNU,0.513951,0.425042,0.638362,-0.801191,-0.063248


## Ratio

In [6]:
#ratio
res_pcaoiv.ratio_

0.6665537686721228

## Eigen values

In [7]:
#PCA eigen values
from scientisttools import get_eig
print_dt(get_eig(res_pcaoiv),rowname="Dimensions",title=html("<b>Eigen values</b>"))

Eigen values,Eigen values,Eigen values,Eigen values,Eigen values
Dimensions,Eigenvalue,Difference,Proportion,Cumulative
Dim.1,1.736915,0.339596,26.058139,26.058139
Dim.2,1.397319,0.209555,20.963333,47.021473
Dim.3,1.187764,0.545118,17.819474,64.840946
Dim.4,0.642646,0.040401,9.641316,74.482263
Dim.5,0.602245,0.185652,9.035199,83.517461
Dim.6,0.416593,0.084614,6.249952,89.767413
Dim.7,0.331979,0.125692,4.980527,94.747939
Dim.8,0.206287,0.062684,3.094827,97.842767
Dim.9,0.143603,0.143416,2.154415,99.997182
Dim.10,0.000188,,0.002818,100.0


## Individuals informations

In [8]:
#individuals informations
from scientisttools import get_pcaoiv_ind
ind = get_pcaoiv_ind(res_pcaoiv)
ind._fields

('coord', 'cos2', 'contrib', 'infos')

### Individuals coordinates

In [9]:
#individuals factor coordinates
print_dt(ind.coord,rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
SEBRLE,0.830429,0.357013,1.159097,0.086348,1.210317
CLAY,0.682324,2.155342,-0.353584,-2.3679,-0.508816
KARPOV,0.620661,2.014828,-1.650821,0.496692,-0.733025
BERNARD,-0.861129,0.195334,2.439503,-0.150877,-0.11278
YURKOV,2.123507,-1.805207,0.848381,0.599318,0.235727
WARNERS,-1.61044,0.6278,-0.504561,0.670781,0.001557
ZSIVOCZKY,-1.050959,-1.196789,-1.881038,-0.392869,-0.122734
McMULLEN,0.293994,-0.276618,-1.776139,0.159172,1.759533
MARTINEAU,0.552322,-1.275708,0.010888,1.126412,-2.410472
HERNU,0.513951,0.425042,0.638362,-0.801191,-0.063248


### Individuals contributions

In [10]:
#individuals contributions
print_dt(ind.contrib,rowname="Individuals",title=html("<b>Individuals contributions</b>"))

Individuals contributions,Individuals contributions,Individuals contributions,Individuals contributions,Individuals contributions,Individuals contributions
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
SEBRLE,0.968372,0.222479,2.758837,0.028297,5.932551
CLAY,0.653761,8.108732,0.256727,21.279985,1.048491
KARPOV,0.540936,7.085927,5.59611,0.936308,2.176108
BERNARD,1.041295,0.0666,12.220491,0.086395,0.051512
YURKOV,6.33206,5.688199,1.477978,1.363201,0.22504
WARNERS,3.641889,0.68796,0.522773,1.707681,1e-05
ZSIVOCZKY,1.55099,2.500089,7.265762,0.585787,0.061006
McMULLEN,0.121371,0.133562,6.477989,0.096156,12.538281
MARTINEAU,0.428373,2.840685,0.000243,4.815475,23.531378
HERNU,0.370921,0.315343,0.836796,2.43622,0.016201


### Individuals squared cosinus (cos2) 

In [11]:
#individuals squared cosinus (cos2)
print_dt(ind.cos2,rowname="Individuals",title=html("<b>Individuals squared cosinus (Cos2)</b>"))

Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2)
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
SEBRLE,0.181669,0.033577,0.353928,0.001964,0.385899
CLAY,0.040074,0.399863,0.010761,0.48262,0.022284
KARPOV,0.044819,0.47231,0.317067,0.028703,0.062516
BERNARD,0.09913,0.005101,0.79556,0.003043,0.0017
YURKOV,0.496401,0.35874,0.079233,0.03954,0.006117
WARNERS,0.553392,0.084098,0.054321,0.096008,1e-06
ZSIVOCZKY,0.167086,0.216672,0.535258,0.023349,0.002279
McMULLEN,0.011846,0.010487,0.432352,0.003472,0.424305
MARTINEAU,0.030491,0.162662,1.2e-05,0.126817,0.580748
HERNU,0.053656,0.036698,0.082777,0.130392,0.000813


### Additionals informations

In [12]:
#individuals additionals informations (weight,squared distance to origin, inertia)
print_dt(ind.infos,rowname="Individuals",title=html("<b>Individuals additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)"
Individuals,Weight,Sq. Dist.,Inertia,% Inertia
SEBRLE,0.02439,3.795988,0.092585,1.389011
CLAY,0.02439,11.617735,0.283359,4.251111
KARPOV,0.02439,8.595055,0.209635,3.145065
BERNARD,0.02439,7.480485,0.182451,2.737226
YURKOV,0.02439,9.083952,0.22156,3.32396
WARNERS,0.02439,4.686579,0.114307,1.714893
ZSIVOCZKY,0.02439,6.610461,0.161231,2.418871
McMULLEN,0.02439,7.296529,0.177964,2.669914
MARTINEAU,0.02439,10.004982,0.244024,3.660979
HERNU,0.02439,4.922914,0.120071,1.801371


## Variables informations

In [13]:
#variables informations
from scientisttools import get_pcaoiv_var
var = get_pcaoiv_var(res_pcaoiv)
var._fields

('coord', 'cos2', 'contrib', 'infos')

### Variables coordinates

In [14]:
#variables coordinates
print_dt(var.coord,rowname="Variables",title=html("<b>Variables coordinates</b>"))

Variables coordinates,Variables coordinates,Variables coordinates,Variables coordinates,Variables coordinates,Variables coordinates
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
X100m,0.174475,-0.350726,0.147892,0.163895,-0.228431
Long.jump,-0.323743,0.240279,-0.059441,-0.113978,0.301368
Shot.put,0.605784,0.022741,0.046304,0.114184,-0.220298
High.jump,0.362388,-0.223476,-0.272777,0.496152,0.363914
X400m,0.553626,0.053681,0.218381,0.03764,0.209104
X110m.hurdle,0.208364,-0.243006,0.438205,0.142705,0.075171
Discus,0.623554,0.114309,-0.347947,-0.206677,-0.316686
Pole.vault,-0.169516,0.547935,0.682963,0.263244,-0.189709
Javeline,0.304579,-0.437165,0.508222,-0.443487,0.208111
X1500m,0.493076,0.774449,-0.017258,-0.114513,0.222063


### Variables contributions

In [15]:
#variables contributions
print_dt(var.contrib,rowname="Variables",title=html("<b>Variables contributions</b>"))

Variables contributions,Variables contributions,Variables contributions,Variables contributions,Variables contributions,Variables contributions
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
X100m,1.752616,8.803203,1.841449,4.179858,8.664385
Long.jump,6.034248,4.131761,0.297467,2.021497,15.080647
Shot.put,21.127939,0.037011,0.180509,2.028782,8.058402
High.jump,7.560834,3.574088,6.264508,38.305206,21.989988
X400m,17.646305,0.206225,4.015127,0.22046,7.260235
X110m.hurdle,2.49958,4.2261,16.166838,3.168873,0.938279
Discus,22.385663,0.935113,10.192869,6.646774,16.652676
Pole.vault,1.654405,21.486381,39.27028,10.783118,5.975889
Javeline,5.34097,13.677117,21.745877,30.604908,7.191487
X1500m,13.997439,42.923002,0.025077,2.040524,8.188012


### Variables squared cosinus (cos2)

In [16]:
#variables squared cosinus (cos2)
print_dt(var.cos2,rowname="Variables",title=html("<b>Variables squared cosinus (cos2)</b>"))

Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2)
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
X100m,0.072535,0.293101,0.052116,0.064005,0.124335
Long.jump,0.228065,0.125628,0.007688,0.028268,0.197628
Shot.put,0.626557,0.000883,0.003661,0.02226,0.08286
High.jump,0.201467,0.076615,0.114149,0.377646,0.203167
X400m,0.568867,0.005348,0.088513,0.00263,0.081153
X110m.hurdle,0.074424,0.101228,0.329171,0.034909,0.009687
Discus,0.5118,0.017199,0.15936,0.056226,0.132011
Pole.vault,0.031278,0.326793,0.507701,0.075428,0.039173
Javeline,0.114674,0.236241,0.319281,0.243124,0.053537
X1500m,0.258865,0.638603,0.000317,0.013962,0.052505


### Addtionals informations

In [17]:
#variables additionals informations
print_dt(var.infos,rowname="Variables",title=html("<b>Variables additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)"
Variables,Weight,Sq. Dist.,Inertia,% Inertia
X100m,1.0,0.419681,0.419681,6.296275
Long.jump,1.0,0.459561,0.459561,6.894585
Shot.put,1.0,0.5857,0.5857,8.78699
High.jump,1.0,0.651845,0.651845,9.779337
X400m,1.0,0.538793,0.538793,8.083258
X110m.hurdle,1.0,0.583356,0.583356,8.751819
Discus,1.0,0.759711,0.759711,11.397596
Pole.vault,1.0,0.918726,0.918726,13.783219
Javeline,1.0,0.808973,0.808973,12.136649
X1500m,1.0,0.939192,0.939192,14.090272


## Supplementary quantitative variables

In [18]:
#supplementary quantitative variables
quanti_sup = res_pcaoiv.quanti_sup_
quanti_sup._fields

('coord', 'cos2')

### Supplementary quantitative variables coordinates

In [19]:
#supplementary quantitative variables coordinates
print_dt(quanti_sup.coord,rowname="Variables",title=html("<b>Supplementary quantitative variables <br> coordinates</b>"))

Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Points,-0.0,0.0,0.0,0.0,-0.0


### Supplementary quantitative variables squared cosinus (cos2)

In [20]:
#supplementary quantitative variables squared cosinus (cos2)
print_dt(quanti_sup.cos2,rowname="Variables",title=html("<b>Supplementary quantitative variables <br> squared cosinus (cos2)</b>"))

Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2)
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Points,0.0,0.0,0.0,0.0,0.0


## Supplementary qualitative variables/categories

In [21]:
#supplementary qualitative variables informations
quali_sup = res_pcaoiv.quali_sup_
quali_sup._fields

('barycentre', 'coord', 'cos2', 'vtest', 'dist2', 'eta2')

### Supplementary variables/categories coordinates

In [22]:
#supplementary variables/categories coordinates
print_dt(quali_sup.coord,rowname="Categories",title=html("<b>Supplementary variables/categories <br> coordinates</b>"))

Supplementary variables/categories coordinates,Supplementary variables/categories coordinates,Supplementary variables/categories coordinates,Supplementary variables/categories coordinates,Supplementary variables/categories coordinates,Supplementary variables/categories coordinates
Categories,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Decastar,0.0,-0.0,0.0,-0.0,0.0
OlympicG,-0.0,0.0,-0.0,0.0,-0.0


### Supplementary variables/categories squared cosinus (cos2)

In [23]:
#supplementary variables/categories squared cosinus (cos2)
print_dt(quali_sup.cos2,rowname="Categories",title=html("<b>Supplementary variables/categories <br> squared cosinus (cos2)</b>"))

Supplementary variables/categories squared cosinus (cos2),Supplementary variables/categories squared cosinus (cos2),Supplementary variables/categories squared cosinus (cos2),Supplementary variables/categories squared cosinus (cos2),Supplementary variables/categories squared cosinus (cos2),Supplementary variables/categories squared cosinus (cos2)
Categories,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Decastar,1e-05,0.04077,0.030707,0.200576,0.155337
OlympicG,0.03253,0.049375,0.043454,0.140441,0.048732


### Supplementary variables/categories squared distance to origin (dist2)

In [24]:
#supplementary variables/categories squared distance to origin (dist2)
print_dt(quali_sup.dist2,rowname="Categories",title=html("<b>Supplementary variables/categories <br> squared distance <br> to origin (dist2)</b>"))

Supplementary variables/categories squared distance to origin (dist2),Supplementary variables/categories squared distance to origin (dist2)
Categories,Sq. Dist.
Decastar,0.0
OlympicG,0.0


### Supplementary variables/categories value-test (vtest)

In [25]:
#supplementary variables/categories value-test (vtest)
print_dt(quali_sup.vtest,rowname="Categories",title=html("<b>Supplementary variables/categories <br> value-test (vtest)</b>"))

Supplementary variables/categories value-test (vtest),Supplementary variables/categories value-test (vtest),Supplementary variables/categories value-test (vtest),Supplementary variables/categories value-test (vtest),Supplementary variables/categories value-test (vtest),Supplementary variables/categories value-test (vtest)
Categories,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Decastar,0.0,-0.0,0.0,-0.0,0.0
OlympicG,-0.0,0.0,-0.0,0.0,-0.0


### Squared correlation ratio

In [26]:
#supplementary qualitative variables squared correlation ratio (eta2)
print_dt(quali_sup.eta2,rowname="Categories",title=html("<b>Supplementary qualitative variables <br> squared correlation ratio (eta2)</b>"))

Supplementary qualitative variables squared correlation ratio (eta2),Supplementary qualitative variables squared correlation ratio (eta2),Supplementary qualitative variables squared correlation ratio (eta2),Supplementary qualitative variables squared correlation ratio (eta2),Supplementary qualitative variables squared correlation ratio (eta2),Supplementary qualitative variables squared correlation ratio (eta2)
Categories,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Competition,0.0,0.0,0.0,0.0,0.0


# Descriptive statistiques

In [27]:
#descriptive statistics for quantitative variables
print_dt(res_pcaoiv.summary_quanti_,rowname="",title=html("<b>Descriptive statistics of quantitative variables</b>"))

Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables
Unnamed: 0_level_1,variable,count,mean,std,min,25%,50%,75%,max
0,X100m,41,10.998049,0.263023,10.44,10.85,10.98,11.14,11.64
1,Long.jump,41,7.26,0.316402,6.61,7.03,7.3,7.48,7.96
2,Shot.put,41,14.477073,0.824428,12.68,13.88,14.57,14.97,16.36
3,High.jump,41,1.976829,0.088951,1.85,1.92,1.95,2.04,2.15
4,X400m,41,49.616341,1.153451,46.81,48.93,49.4,50.3,53.2
5,X110m.hurdle,41,14.605854,0.471789,13.97,14.21,14.48,14.98,15.67
6,Discus,41,44.32561,3.377845,37.92,41.9,44.41,46.07,51.65
7,Pole.vault,41,4.762439,0.278,4.2,4.5,4.8,4.92,5.4
8,Javeline,41,58.316585,4.82682,50.31,55.27,58.36,60.89,70.52
9,X1500m,41,279.024878,11.673247,262.1,271.02,278.05,285.1,317.0


In [28]:
#summary_quali
print_dt(res_pcaoiv.summary_quali_,rowname="",title=html("<b>Descriptive statistics of <br> qualitative variables</b>"))

Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables,Descriptive statistics of qualitative variables
Unnamed: 0_level_1,variable,categorie,count,proportion
0,Competition,OlympicG,28,0.682927
1,Competition,Decastar,13,0.317073


# Extractions

In [29]:
#extract functions
from scientisttools import get_eig, get_pcaoiv, summaryPCAOIV
eig = get_eig(res_pcaoiv)
ind, var = get_pcaoiv(res_pcaoiv,"ind"), get_pcaoiv(res_pcaoiv,"var")
summaryPCAOIV(res_pcaoiv)

         Principal Component Analysis with Orthogonal Instrumental Variables - Results               

Eigenvalues
                       Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7  \
Variance               1.737   1.397   1.188   0.643   0.602   0.417   0.332   
Difference             0.340   0.210   0.545   0.040   0.186   0.085   0.126   
% of var.             26.058  20.963  17.819   9.641   9.035   6.250   4.981   
Cumulative % of var.  26.058  47.021  64.841  74.482  83.517  89.767  94.748   

                       Dim.8   Dim.9   Dim.10  
Variance               0.206   0.144    0.000  
Difference             0.063   0.143      NaN  
% of var.              3.095   2.154    0.003  
Cumulative % of var.  97.843  99.997  100.000  

Individuals (the 10 first)

           Weight  Sq. Dist.  Inertia  % Inertia  Dim.1    ctr   cos2  Dim.2  \
SEBRLE      0.024      3.796    0.093      1.389  0.830  0.968  0.182  0.357   
CLAY        0.024     11.618    0.283      4.251  0.682