# Principal Component Analysis with Orthogonal Instrumental Variables (PCAOIV) - `rhone` dataset

In [1]:
#disable warnings
from warnings import simplefilter, filterwarnings
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore")
from great_tables import GT, html
def print_dt(data,title=None,subtitle=None,rowname=None,digits=6):
    dt = (GT(data=data.round(digits).rename_axis(rowname).reset_index())
          .tab_header(title=title, subtitle=subtitle))
    return dt

## `rhone` dataset

In [2]:
#decathlon dataset
from scientisttools.datasets import rhone
(
    GT(rhone.rename_axis("Individuals").reset_index())
    .tab_header(title=html("<b>Rhone Dataset</b>"))
    .tab_spanner(label=html("<b>Individuals</b>"),columns="Individuals")
    .tab_spanner(label=html("<b>Dependent<br>variables</b>"),columns=rhone.columns.tolist()[:15])
    .tab_spanner(label=html("<b>Explanatory (instrumental)<br>variables</b>"),columns=rhone.columns.tolist()[15:])
)

Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset
Individuals,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Explanatory (instrumental) variables,Explanatory (instrumental) variables,Explanatory (instrumental) variables
Individuals,air.temp,wat.temp,conduc,pH,oxygen,secchi,caco3,totca,mg,so4,no2,hco3,suspension,organique,chloro,Geneva,Arve,Others
0,2,5.9,359,8.2,93,67,186,62.9,7.1,35.0,0.55,176.9,17.3,2.6,1.4,299.0,60.0,218.0
1,2,3.4,348,7.9,92,203,176,57.7,7.8,42.1,0.78,158.6,3.7,0.9,1.6,266.0,30.7,89.3
2,10,7.5,260,8.0,94,176,176,60.1,6.3,32.9,0.54,169.6,4.4,1.2,5.7,113.0,53.0,255.0
3,16,9.1,298,7.9,101,85,165,57.7,5.1,32.8,0.63,161.0,22.0,3.7,6.2,389.0,99.5,248.5
4,15,9.6,287,8.2,96,40,167,58.9,4.9,24.4,0.48,176.9,44.9,5.6,2.9,315.0,141.0,433.0
5,10,10.1,277,8.2,98,28,165,57.3,5.3,28.6,0.48,170.8,92.4,8.8,9.2,254.0,312.0,681.0
6,15,11.4,293,8.2,98,22,176,62.1,5.1,22.5,0.55,191.5,98.0,9.8,4.4,357.0,197.0,865.0
7,12,9.5,295,8.1,98,55,170,58.9,5.6,29.8,0.5,168.4,40.0,4.4,10.6,469.0,117.0,367.0
8,16,11.0,299,8.3,95,22,170,60.1,4.9,30.7,0.65,170.8,29.3,4.4,1.5,435.0,119.0,208.0
9,23,16.3,243,8.1,93,73,135,45.3,5.3,33.5,0.54,126.9,26.6,5.4,6.4,351.0,193.0,154.0


## Instanciation and training

In [3]:
from scientisttools import PCAOIV
#instanciation
res_pcaoiv = PCAOIV(iv=(15,16,17))

## `fit` function

In [4]:
#fit
res_pcaoiv.fit(rhone)

## `fit_transform` function

In [5]:
#fit_transform
print_dt(res_pcaoiv.fit_transform(rhone),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
0,2.755117,1.121177,-0.2739,0.698145,-0.187062
1,2.953005,-0.269717,1.743127,-0.107171,0.128682
2,-1.307802,-0.290253,0.255352,1.521012,0.667162
3,1.006945,-0.640502,0.779445,1.09323,1.088518
4,-0.134402,0.659894,-1.29967,0.301242,0.42028
5,-1.278893,-0.205831,1.219143,0.633398,-1.350493
6,-1.877933,-0.102872,-0.624828,-1.367867,-0.06387
7,0.630263,-0.311107,0.93689,2.149982,-0.636952
8,2.276503,1.780008,-0.709781,0.144017,0.341734
9,-0.549141,0.212938,0.551049,0.416501,-0.695207


## Ratio

In [6]:
#ratio
res_pcaoiv.ratio_

0.497151426658365

## Eigen values

In [7]:
#PCA eigen values
from scientisttools import get_eig
print_dt(get_eig(res_pcaoiv),rowname="Dimensions",title=html("<b>Eigen values</b>"))

Eigen values,Eigen values,Eigen values,Eigen values,Eigen values
Dimensions,Eigenvalue,Difference,Proportion,Cumulative
Dim.1,2.866382,1.890667,38.437414,38.437414
Dim.2,0.975715,0.106461,13.084075,51.52149
Dim.3,0.869254,0.141021,11.656464,63.177954
Dim.4,0.728233,0.18629,9.765414,72.943368
Dim.5,0.541943,0.164263,7.267316,80.210684
Dim.6,0.377681,0.005088,5.064596,85.27528
Dim.7,0.372593,0.083043,4.996369,90.271649
Dim.8,0.28955,0.129902,3.882784,94.154434
Dim.9,0.159647,0.038138,2.14083,96.295264
Dim.10,0.121509,0.051911,1.629405,97.924669


## Individuals informations

In [8]:
#individuals informations
from scientisttools import get_pcaoiv_ind
ind = get_pcaoiv_ind(res_pcaoiv)
ind._fields

('coord', 'cos2', 'contrib', 'infos')

### Individuals coordinates

In [9]:
#individuals factor coordinates
print_dt(ind.coord,rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
0,2.755117,1.121177,-0.2739,0.698145,-0.187062
1,2.953005,-0.269717,1.743127,-0.107171,0.128682
2,-1.307802,-0.290253,0.255352,1.521012,0.667162
3,1.006945,-0.640502,0.779445,1.09323,1.088518
4,-0.134402,0.659894,-1.29967,0.301242,0.42028
5,-1.278893,-0.205831,1.219143,0.633398,-1.350493
6,-1.877933,-0.102872,-0.624828,-1.367867,-0.06387
7,0.630263,-0.311107,0.93689,2.149982,-0.636952
8,2.276503,1.780008,-0.709781,0.144017,0.341734
9,-0.549141,0.212938,0.551049,0.416501,-0.695207


### Individuals contributions

In [10]:
#individuals contributions
print_dt(ind.contrib,rowname="Individuals",title=html("<b>Individuals contributions</b>"))

Individuals contributions,Individuals contributions,Individuals contributions,Individuals contributions,Individuals contributions,Individuals contributions
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
0,6.790179,3.3034,0.221296,1.716151,0.165559
1,7.800629,0.191174,8.962862,0.04044,0.078346
2,1.529979,0.221394,0.192338,8.14573,2.105932
3,0.907011,1.078087,1.79209,4.208119,5.605995
4,0.016159,1.144356,4.982585,0.319517,0.835718
5,1.463085,0.111335,4.384279,1.412595,8.629112
6,3.154726,0.02781,1.15162,6.58798,0.019301
7,0.35534,0.25435,2.589201,16.275505,1.919531
8,4.635937,8.326383,1.486062,0.073028,0.552534
9,0.269755,0.119157,0.895713,0.610797,2.286705


### Individuals squared cosinus (cos2) 

In [11]:
#individuals squared cosinus (cos2)
print_dt(ind.cos2,rowname="Individuals",title=html("<b>Individuals squared cosinus (Cos2)</b>"))

Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2),Individuals squared cosinus (Cos2)
Individuals,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
0,0.64478,0.106778,0.006373,0.041402,0.002972
1,0.640056,0.00534,0.223022,0.000843,0.001215
2,0.266285,0.013116,0.010152,0.360186,0.069299
3,0.186212,0.075342,0.111575,0.219492,0.217604
4,0.006094,0.146901,0.569825,0.030613,0.059587
5,0.229751,0.005951,0.208785,0.056356,0.256197
6,0.508699,0.001526,0.056315,0.269891,0.000588
7,0.045196,0.011012,0.09987,0.525928,0.04616
8,0.523307,0.319937,0.050871,0.002094,0.011792
9,0.073484,0.011049,0.073996,0.042273,0.117775


### Additionals informations

In [12]:
#individuals additionals informations (weight,squared distance to origin, inertia)
print_dt(ind.infos,rowname="Individuals",title=html("<b>Individuals additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)"
Individuals,Weight,Sq. Dist.,Inertia,% Inertia
0,0.025641,11.7725,0.301859,4.047848
1,0.025641,13.624185,0.349338,4.684529
2,0.025641,6.423006,0.164692,2.208482
3,0.025641,5.445072,0.139617,1.872229
4,0.025641,2.964319,0.076008,1.019249
5,0.025641,7.118869,0.182535,2.447747
6,0.025641,6.932653,0.17776,2.383718
7,0.025641,8.789083,0.225361,3.022032
8,0.025641,9.903289,0.25393,3.40514
9,0.025641,4.103685,0.105223,1.411008


## Variables informations

In [13]:
#variables informations
from scientisttools import get_pcaoiv_var
var = get_pcaoiv_var(res_pcaoiv)
var._fields

('coord', 'cos2', 'contrib', 'infos')

### Variables coordinates

In [14]:
#variables coordinates
print_dt(var.coord,rowname="Variables",title=html("<b>Variables coordinates</b>"))

Variables coordinates,Variables coordinates,Variables coordinates,Variables coordinates,Variables coordinates,Variables coordinates
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
air.temp,-0.519144,0.020336,-0.076822,-0.095672,0.066601
wat.temp,-0.624574,0.001921,-0.067639,-0.138388,-0.028069
conduc,0.685306,-0.00558,0.031928,0.056601,-0.044038
pH,-0.280031,0.897656,-0.013741,0.12338,-0.18784
oxygen,-0.080435,0.194292,0.337556,0.264137,0.516053
secchi,-0.171761,-0.125572,0.338547,-0.027639,-0.071867
caco3,0.634766,0.033276,0.045981,0.090331,-0.035617
totca,0.634721,0.026676,-0.009423,0.126832,0.038768
mg,0.336473,0.050852,0.285368,-0.128878,-0.377768
so4,-0.305009,-0.083202,0.418344,-0.148693,0.024926


### Variables contributions

In [15]:
#variables contributions
print_dt(var.contrib,rowname="Variables",title=html("<b>Variables contributions</b>"))

Variables contributions,Variables contributions,Variables contributions,Variables contributions,Variables contributions,Variables contributions
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
air.temp,9.402476,0.042384,0.678922,1.256891,0.818484
wat.temp,13.609216,0.000378,0.526313,2.629816,0.145375
conduc,16.384563,0.003191,0.117274,0.439919,0.357847
pH,2.735763,82.584189,0.02172,2.090348,6.510621
oxygen,0.225713,3.868903,13.108216,9.580529,49.13997
secchi,1.029232,1.616079,13.185366,0.104902,0.953037
caco3,14.057002,0.113482,0.243231,1.120488,0.234078
totca,14.055026,0.072935,0.010214,2.208958,0.27733
mg,3.94972,0.265031,9.368361,2.280794,26.332711
so4,3.24557,0.70948,20.133553,3.036055,0.114642


### Variables squared cosinus (cos2)

In [16]:
#variables squared cosinus (cos2)
print_dt(var.cos2,rowname="Variables",title=html("<b>Variables squared cosinus (cos2)</b>"))

Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2),Variables squared cosinus (cos2)
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
air.temp,0.647394,0.000993,0.014176,0.021987,0.010655
wat.temp,0.828574,8e-06,0.009718,0.040678,0.001673
conduc,0.808273,5.4e-05,0.001754,0.005514,0.003338
pH,0.082644,0.84922,0.000199,0.016043,0.037186
oxygen,0.011737,0.068482,0.206706,0.126568,0.483117
secchi,0.108953,0.058234,0.423283,0.002821,0.019075
caco3,0.869581,0.00239,0.004563,0.01761,0.002738
totca,0.853075,0.001507,0.000188,0.034063,0.003183
mg,0.233845,0.005341,0.168205,0.034307,0.294766
so4,0.22237,0.016547,0.41833,0.052848,0.001485


### Addtionals informations

In [17]:
#variables additionals informations
print_dt(var.infos,rowname="Variables",title=html("<b>Variables additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)"
Variables,Weight,Sq. Dist.,Inertia,% Inertia
air.temp,1.0,0.416301,0.416301,5.582487
wat.temp,1.0,0.470799,0.470799,6.313293
conduc,1.0,0.581046,0.581046,7.791676
pH,1.0,0.948854,0.948854,12.723877
oxygen,1.0,0.551235,0.551235,7.39191
secchi,1.0,0.270775,0.270775,3.631015
caco3,1.0,0.463358,0.463358,6.213508
totca,1.0,0.472257,0.472257,6.332838
mg,1.0,0.484141,0.484141,6.492196
so4,1.0,0.418358,0.418358,5.610067


## Supplementary quantitative variables

In [18]:
#supplementary quantitative variables
quanti_sup = res_pcaoiv.quanti_sup_
quanti_sup._fields

('coord', 'cos2')

### Supplementary quantitative variables coordinates

In [19]:
#supplementary quantitative variables coordinates
print_dt(quanti_sup.coord,rowname="Variables",title=html("<b>Supplementary quantitative variables <br> coordinates</b>"))

Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates,Supplementary quantitative variables coordinates
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Geneva,-0.0,0.0,-0.0,-0.0,0.0
Arve,-0.0,-0.0,-0.0,-0.0,0.0
Others,-0.0,-0.0,0.0,0.0,-0.0


### Supplementary quantitative variables squared cosinus (cos2)

In [20]:
#supplementary quantitative variables squared cosinus (cos2)
print_dt(quanti_sup.cos2,rowname="Variables",title=html("<b>Supplementary quantitative variables <br> squared cosinus (cos2)</b>"))

Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2),Supplementary quantitative variables squared cosinus (cos2)
Variables,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Geneva,0.0,0.0,0.0,0.0,0.0
Arve,0.0,0.0,0.0,0.0,0.0
Others,0.0,0.0,0.0,0.0,0.0


# Descriptive statistiques

In [21]:
#descriptive statistics for quantitative variables
print_dt(res_pcaoiv.summary_quanti_,rowname="",title=html("<b>Descriptive statistics of quantitative variables</b>"))

Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables
Unnamed: 0_level_1,variable,count,mean,std,min,25%,50%,75%,max
0,air.temp,39,13.769231,7.382274,-5.0,9.0,14.0,19.0,24.0
1,wat.temp,39,13.102564,5.563295,3.4,9.3,11.4,16.65,24.0
2,conduc,39,281.358974,36.337227,225.0,254.5,276.0,298.5,359.0
3,pH,39,8.035897,0.176944,7.6,8.0,8.1,8.1,8.3
4,oxygen,39,91.846154,6.971893,77.0,86.0,93.0,96.5,109.0
5,secchi,39,107.538462,62.768516,19.0,61.5,98.0,166.0,250.0
6,caco3,39,157.538462,27.050485,116.0,139.0,156.0,173.0,227.0
7,totca,39,53.069231,9.78057,37.3,46.9,52.5,59.3,75.8
8,mg,39,6.110256,1.231771,2.9,5.3,5.8,6.6,9.2
9,so4,39,36.082051,6.570383,21.3,32.1,37.9,40.3,50.1


# Extractions

In [22]:
#extract functions
from scientisttools import get_eig, get_pcaoiv, summaryPCAOIV
eig = get_eig(res_pcaoiv)
ind, var = get_pcaoiv(res_pcaoiv,"ind"), get_pcaoiv(res_pcaoiv,"var")
summaryPCAOIV(res_pcaoiv)

         Principal Component Analysis with Orthogonal Instrumental Variables - Results               

Eigenvalues
                       Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7  \
Variance               2.866   0.976   0.869   0.728   0.542   0.378   0.373   
Difference             1.891   0.106   0.141   0.186   0.164   0.005   0.083   
% of var.             38.437  13.084  11.656   9.765   7.267   5.065   4.996   
Cumulative % of var.  38.437  51.521  63.178  72.943  80.211  85.275  90.272   

                       Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13   Dim.14  \
Variance               0.290   0.160   0.122   0.070   0.051   0.021    0.013   
Difference             0.130   0.038   0.052   0.019   0.030   0.007    0.013   
% of var.              3.883   2.141   1.629   0.933   0.685   0.277    0.181   
Cumulative % of var.  94.154  96.295  97.925  98.858  99.542  99.819  100.000   

                      Dim.15  
Variance                 0.0  
Difference      