# PCAiv - `decathlon` dataset

In [1]:
#disable warnings
from warnings import simplefilter, filterwarnings
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore")
from great_tables import GT, html
def print_dt(data,title=None,subtitle=None,rowname=None,digits=4):
    dt = (GT(data=data.round(digits).rename_axis(rowname).reset_index())
          .tab_header(title=title, subtitle=subtitle))
    return dt

## `decathlon` dataset

In [2]:
#decathlon dataset
from scientisttools.datasets import load_decathlon
decathlon = load_decathlon().iloc[:41,:].drop(columns="Rank")
(
    GT(decathlon.rename_axis("Individuals").reset_index())
    .tab_header(title=html("<b>Decathlon Dataset</b>"))
    .tab_spanner(label=html("<b>Individuals</b>"),columns="Individuals")
    .tab_spanner(label=html("<b>Dependent<br>variables</b>"),columns=decathlon.columns.tolist()[:10])
    .tab_spanner(label=html("<b>Explanatory (instrumental)<br>variables</b>"),columns=decathlon.columns.tolist()[10:])
)

Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset,Decathlon Dataset
Individuals,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Explanatory (instrumental) variables,Explanatory (instrumental) variables
Individuals,X100m,Long.jump,Shot.put,High.jump,X400m,X110m.hurdle,Discus,Pole.vault,Javeline,X1500m,Points,Competition
SEBRLE,11.04,7.58,14.83,2.07,49.81,14.69,43.75,5.02,63.19,291.7,8217.0,Decastar
CLAY,10.76,7.4,14.26,1.86,49.37,14.05,50.72,4.92,60.15,301.5,8122.0,Decastar
KARPOV,11.02,7.3,14.77,2.04,48.37,14.09,48.95,4.92,50.31,300.2,8099.0,Decastar
BERNARD,11.02,7.23,14.25,1.92,48.93,14.99,40.87,5.32,62.77,280.1,8067.0,Decastar
YURKOV,11.34,7.09,15.19,2.1,50.42,15.31,46.26,4.72,63.44,276.4,8036.0,Decastar
WARNERS,11.11,7.6,14.31,1.98,48.68,14.23,41.1,4.92,51.77,278.1,8030.0,Decastar
ZSIVOCZKY,11.13,7.3,13.48,2.01,48.62,14.17,45.67,4.42,55.37,268.0,8004.0,Decastar
McMULLEN,10.83,7.31,13.76,2.13,49.91,14.38,44.41,4.42,56.37,285.1,7995.0,Decastar
MARTINEAU,11.64,6.81,14.57,1.95,50.14,14.93,47.6,4.92,52.33,262.1,7802.0,Decastar
HERNU,11.37,7.56,14.41,1.86,51.1,15.06,44.99,4.82,57.19,285.1,7733.0,Decastar


## Instanciation and training

In [3]:
from scientisttools import PCAiv
#instanciation
res_pcaiv = PCAiv(iv=(10,11))

## `fit` function

In [4]:
#fit
res_pcaiv.fit(decathlon)

## `fit_transform` function

In [5]:
#fit_transform
print_dt(res_pcaiv.fit_transform(decathlon).head(10),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2
SEBRLE,0.9981,0.8414
CLAY,0.5094,0.8071
KARPOV,0.3911,0.7988
BERNARD,0.2265,0.7873
YURKOV,0.067,0.7761
WARNERS,0.0361,0.7739
ZSIVOCZKY,-0.0976,0.7645
McMULLEN,-0.1439,0.7613
MARTINEAU,-1.1367,0.6916
HERNU,-1.4917,0.6667


## Ratio

In [6]:
#ratio
round(res_pcaiv.ratio_,4)

0.3334

## Eigen values

In [7]:
#PCA eigen values
from scientisttools import get_eig
print_dt(get_eig(res_pcaiv),rowname="Dimensions",title=html("<b>Eigen values</b>"))

Eigen values,Eigen values,Eigen values,Eigen values,Eigen values
Dimensions,Eigenvalue,Difference,Proportion,Cumulative
Dim.1,3.0733,2.8122,92.1689,92.1689
Dim.2,0.2611,,7.8311,100.0


## Individuals informations

In [8]:
#individuals informations
from scientisttools import get_pcaiv_ind
ind = get_pcaiv_ind(res_pcaiv)
ind._fields

('coord', 'cos2', 'contrib', 'infos')

### Individuals coordinates

In [9]:
#individuals factor coordinates
print_dt(ind.coord.head(10),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2
SEBRLE,0.9981,0.8414
CLAY,0.5094,0.8071
KARPOV,0.3911,0.7988
BERNARD,0.2265,0.7873
YURKOV,0.067,0.7761
WARNERS,0.0361,0.7739
ZSIVOCZKY,-0.0976,0.7645
McMULLEN,-0.1439,0.7613
MARTINEAU,-1.1367,0.6916
HERNU,-1.4917,0.6667


### Individuals contributions

In [10]:
#individuals contributions
print_dt(ind.contrib.head(10),rowname="Individuals",title=html("<b>Individuals contributions</b>"))

Individuals contributions,Individuals contributions,Individuals contributions
Individuals,Dim.1,Dim.2
SEBRLE,0.7906,6.6127
CLAY,0.2059,6.0847
KARPOV,0.1214,5.9602
BERNARD,0.0407,5.789
YURKOV,0.0036,5.6256
WARNERS,0.001,5.5943
ZSIVOCZKY,0.0076,5.4594
McMULLEN,0.0164,5.4131
MARTINEAU,1.0255,4.4678
HERNU,1.7659,4.1518


### Individuals cos2

In [11]:
#individuals squared cosinus (cos2)
print_dt(ind.cos2.head(10),rowname="Individuals",title=html("<b>Individuals cos2</b>"))

Individuals cos2,Individuals cos2,Individuals cos2
Individuals,Dim.1,Dim.2
SEBRLE,0.5846,0.4154
CLAY,0.2849,0.7151
KARPOV,0.1934,0.8066
BERNARD,0.0764,0.9236
YURKOV,0.0074,0.9926
WARNERS,0.0022,0.9978
ZSIVOCZKY,0.016,0.984
McMULLEN,0.0345,0.9655
MARTINEAU,0.7298,0.2702
HERNU,0.8335,0.1665


### Indivdiuals additionals informations

In [12]:
#individuals additionals informations (weight,squared distance to origin, inertia)
print_dt(ind.infos.head(10),rowname="Individuals",title=html("<b>Individuals additionals informations</b>"))

Individuals additionals informations,Individuals additionals informations,Individuals additionals informations,Individuals additionals informations,Individuals additionals informations
Individuals,Weight,Sq. Dist.,Inertia,% Inertia
SEBRLE,0.0244,1.7042,0.0416,1.2465
CLAY,0.0244,0.9109,0.0222,0.6663
KARPOV,0.0244,0.7911,0.0193,0.5786
BERNARD,0.0244,0.6711,0.0164,0.4909
YURKOV,0.0244,0.6068,0.0148,0.4438
WARNERS,0.0244,0.6002,0.0146,0.439
ZSIVOCZKY,0.0244,0.594,0.0145,0.4345
McMULLEN,0.0244,0.6002,0.0146,0.4391
MARTINEAU,0.0244,1.7705,0.0432,1.295
HERNU,0.0244,2.6696,0.0651,1.9527


## Variables informations

In [13]:
#variables informations
from scientisttools import get_pcaiv_var
var = get_pcaiv_var(res_pcaiv)
var._fields

('coord', 'cos2', 'contrib', 'infos')

### Variables coordinates

In [14]:
#variables coordinates
print_dt(var.coord,rowname="Variables",title=html("<b>Variables coordinates</b>"))

Variables coordinates,Variables coordinates,Variables coordinates
Variables,Dim.1,Dim.2
X100m,-0.6954,0.311
Long.jump,0.7205,0.1458
Shot.put,0.632,-0.1221
High.jump,0.5721,0.1446
X400m,-0.6621,-0.151
X110m.hurdle,-0.6453,0.014
Discus,0.4813,0.0932
Pole.vault,0.1902,0.2123
Javeline,0.426,-0.0977
X1500m,-0.1995,0.145


### Variables contributions

In [15]:
#variables contributions
print_dt(var.contrib,rowname="Variables",title=html("<b>Variables contributions</b>"))

Variables contributions,Variables contributions,Variables contributions
Variables,Dim.1,Dim.2
X100m,15.7353,37.0402
Long.jump,16.8927,8.1457
Shot.put,12.9954,5.7084
High.jump,10.648,8.0061
X400m,14.265,8.7299
X110m.hurdle,13.5503,0.0752
Discus,7.536,3.3248
Pole.vault,1.1774,17.267
Javeline,5.9053,3.6528
X1500m,1.2946,8.0499


### Variables cos2

In [16]:
#variables cos2
print_dt(var.cos2,rowname="Variables",title=html("<b>Variables cos2</b>"))

Variables cos2,Variables cos2,Variables cos2
Variables,Dim.1,Dim.2
X100m,0.8333,0.1667
Long.jump,0.9606,0.0394
Shot.put,0.964,0.036
High.jump,0.94,0.06
X400m,0.9506,0.0494
X110m.hurdle,0.9995,0.0005
Discus,0.9639,0.0361
Pole.vault,0.4452,0.5548
Javeline,0.9501,0.0499
X1500m,0.6543,0.3457


### Variables addtionals informations

In [17]:
#variables additionals informations
print_dt(var.infos,rowname="Variables",title=html("<b>Variables additionals informations</b>"))

Variables additionals informations,Variables additionals informations,Variables additionals informations,Variables additionals informations,Variables additionals informations
Variables,Weight,Sq. Dist.,Inertia,% Inertia
X100m,1.0,0.5803,0.5803,17.4037
Long.jump,1.0,0.5404,0.5404,16.2077
Shot.put,1.0,0.4143,0.4143,12.4248
High.jump,1.0,0.3482,0.3482,10.4411
X400m,1.0,0.4612,0.4612,13.8315
X110m.hurdle,1.0,0.4166,0.4166,12.4951
Discus,1.0,0.2403,0.2403,7.2062
Pole.vault,1.0,0.0813,0.0813,2.4374
Javeline,1.0,0.191,0.191,5.7289
X1500m,1.0,0.0608,0.0608,1.8236


## Summary

### Descriptive statistics for quantitative variables

In [18]:
#descriptive statistics for quantitative variables
print_dt(res_pcaiv.summary_quanti_,rowname="",title=html("<b>Descriptive statistics of quantitative variables</b>"))

Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables
Unnamed: 0_level_1,variable,count,mean,std,min,25%,50%,75%,max
0,X100m,41,10.998,0.263,10.44,10.85,10.98,11.14,11.64
1,Long.jump,41,7.26,0.3164,6.61,7.03,7.3,7.48,7.96
2,Shot.put,41,14.4771,0.8244,12.68,13.88,14.57,14.97,16.36
3,High.jump,41,1.9768,0.089,1.85,1.92,1.95,2.04,2.15
4,X400m,41,49.6163,1.1535,46.81,48.93,49.4,50.3,53.2
5,X110m.hurdle,41,14.6059,0.4718,13.97,14.21,14.48,14.98,15.67
6,Discus,41,44.3256,3.3778,37.92,41.9,44.41,46.07,51.65
7,Pole.vault,41,4.7624,0.278,4.2,4.5,4.8,4.92,5.4
8,Javeline,41,58.3166,4.8268,50.31,55.27,58.36,60.89,70.52
9,X1500m,41,279.0249,11.6732,262.1,271.02,278.05,285.1,317.0


### Correlation tests

In [19]:
#correlation tests
print_dt(res_pcaiv.corrtest_.head(10),rowname="",title=html("<b>Correlation tests</b>"))

Correlation tests,Correlation tests,Correlation tests,Correlation tests,Correlation tests,Correlation tests
Unnamed: 0_level_1,variable1,variable2,test,statistic,pvalue
0,X100m,Long.jump,Pearson correlation,-0.5987,0.0
1,X100m,Shot.put,Pearson correlation,-0.3565,0.0222
2,X100m,High.jump,Pearson correlation,-0.2463,0.1207
3,X100m,X400m,Pearson correlation,0.5203,0.0005
4,X100m,X110m.hurdle,Pearson correlation,0.5799,0.0001
5,X100m,Discus,Pearson correlation,-0.2217,0.1636
6,X100m,Pole.vault,Pearson correlation,-0.0825,0.6079
7,X100m,Javeline,Pearson correlation,-0.1577,0.3246
8,X100m,X1500m,Pearson correlation,-0.0605,0.7069
9,X100m,Points,Pearson correlation,-0.6843,0.0


# Extractions

In [20]:
#extract functions
from scientisttools import get_eig, get_pcaiv, summaryPCAiv
eig = get_eig(res_pcaiv)
ind, var = get_pcaiv(res_pcaiv,"ind"), get_pcaiv(res_pcaiv,"var")
summaryPCAiv(res_pcaiv)

         Principal Component Analysis with Instrumental Variables - Results               

Eigenvalues
                       Dim.1    Dim.2
Variance               3.073    0.261
Difference             2.812      NaN
% of var.             92.169    7.831
Cumulative % of var.  92.169  100.000

Individuals (the 10 first)

           Weight  Sq. Dist.  Inertia  % Inertia  Dim.1    ctr   cos2  Dim.2  \
SEBRLE      0.024      1.704    0.042      1.247  0.998  0.791  0.585  0.841   
CLAY        0.024      0.911    0.022      0.666  0.509  0.206  0.285  0.807   
KARPOV      0.024      0.791    0.019      0.579  0.391  0.121  0.193  0.799   
BERNARD     0.024      0.671    0.016      0.491  0.226  0.041  0.076  0.787   
YURKOV      0.024      0.607    0.015      0.444  0.067  0.004  0.007  0.776   
WARNERS     0.024      0.600    0.015      0.439  0.036  0.001  0.002  0.774   
ZSIVOCZKY   0.024      0.594    0.014      0.435 -0.098  0.008  0.016  0.765   
McMULLEN    0.024      0.600    0.015