# PCAiv - `rhone` dataset

In [1]:
#disable warnings
from warnings import simplefilter, filterwarnings
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore")
from great_tables import GT, html
def print_dt(data,title=None,subtitle=None,rowname=None,digits=4):
    dt = (GT(data=data.round(digits).rename_axis(rowname).reset_index())
          .tab_header(title=title, subtitle=subtitle))
    return dt

## `rhone` dataset

In [2]:
#rhone dataset
from scientisttools.datasets import rhone
(
    GT(rhone.head(10).rename_axis("Individuals").reset_index())
    .tab_header(title=html("<b>Rhone Dataset</b>"))
    .tab_spanner(label=html("<b>Individuals</b>"),columns="Individuals")
    .tab_spanner(label=html("<b>Dependent<br>variables</b>"),columns=rhone.columns.tolist()[:15])
    .tab_spanner(label=html("<b>Explanatory (instrumental)<br>variables</b>"),columns=rhone.columns.tolist()[15:])
)

Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset,Rhone Dataset
Individuals,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Dependent variables,Explanatory (instrumental) variables,Explanatory (instrumental) variables,Explanatory (instrumental) variables
Individuals,air.temp,wat.temp,conduc,pH,oxygen,secchi,caco3,totca,mg,so4,no2,hco3,suspension,organique,chloro,Geneva,Arve,Others
0,2,5.9,359,8.2,93,67,186,62.9,7.1,35.0,0.55,176.9,17.3,2.6,1.4,299.0,60.0,218.0
1,2,3.4,348,7.9,92,203,176,57.7,7.8,42.1,0.78,158.6,3.7,0.9,1.6,266.0,30.7,89.3
2,10,7.5,260,8.0,94,176,176,60.1,6.3,32.9,0.54,169.6,4.4,1.2,5.7,113.0,53.0,255.0
3,16,9.1,298,7.9,101,85,165,57.7,5.1,32.8,0.63,161.0,22.0,3.7,6.2,389.0,99.5,248.5
4,15,9.6,287,8.2,96,40,167,58.9,4.9,24.4,0.48,176.9,44.9,5.6,2.9,315.0,141.0,433.0
5,10,10.1,277,8.2,98,28,165,57.3,5.3,28.6,0.48,170.8,92.4,8.8,9.2,254.0,312.0,681.0
6,15,11.4,293,8.2,98,22,176,62.1,5.1,22.5,0.55,191.5,98.0,9.8,4.4,357.0,197.0,865.0
7,12,9.5,295,8.1,98,55,170,58.9,5.6,29.8,0.5,168.4,40.0,4.4,10.6,469.0,117.0,367.0
8,16,11.0,299,8.3,95,22,170,60.1,4.9,30.7,0.65,170.8,29.3,4.4,1.5,435.0,119.0,208.0
9,23,16.3,243,8.1,93,73,135,45.3,5.3,33.5,0.54,126.9,26.6,5.4,6.4,351.0,193.0,154.0


## Instanciation and training

In [3]:
from scientisttools import PCAiv
#instanciation
res_pcaiv = PCAiv(iv=(15,16,17))

## `fit` function

In [4]:
#fit
res_pcaiv.fit(rhone)

## `fit_transform` function

In [5]:
#fit_transform
print_dt(res_pcaiv.fit_transform(rhone).head(10),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3
0,0.7748,-0.0086,-0.5993
1,1.0755,-1.1342,-0.3153
2,2.5324,0.6605,-0.1223
3,-0.534,0.2671,-0.6648
4,0.0185,2.1487,-0.643
5,-1.5908,5.2732,0.295
6,0.3125,5.8302,-1.7351
7,-1.0414,1.1366,-1.1897
8,-1.4016,-0.0433,-0.5161
9,-2.2418,0.1513,0.6373


## Ratio

In [6]:
#ratio
round(res_pcaiv.ratio_,4)

0.6603

## Eigen values

In [7]:
#PCA eigen values
from scientisttools import get_eig
print_dt(get_eig(res_pcaiv),rowname="Dimensions",title=html("<b>Eigen values</b>"))

Eigen values,Eigen values,Eigen values,Eigen values,Eigen values
Dimensions,Eigenvalue,Difference,Proportion,Cumulative
Dim.1,3.7031,0.165,49.0951,49.0951
Dim.2,3.5381,3.2366,46.9074,96.0025
Dim.3,0.3015,,3.9975,100.0


## Individuals informations

In [8]:
#individuals informations
from scientisttools import get_pcaiv_ind
ind = get_pcaiv_ind(res_pcaiv)
ind._fields

('coord', 'cos2', 'contrib', 'infos')

### Individuals coordinates

In [9]:
#individuals factor coordinates
print_dt(ind.coord.head(10),rowname="Individuals",title=html("<b>Individuals coordinates</b>"))

Individuals coordinates,Individuals coordinates,Individuals coordinates,Individuals coordinates
Individuals,Dim.1,Dim.2,Dim.3
0,0.7748,-0.0086,-0.5993
1,1.0755,-1.1342,-0.3153
2,2.5324,0.6605,-0.1223
3,-0.534,0.2671,-0.6648
4,0.0185,2.1487,-0.643
5,-1.5908,5.2732,0.295
6,0.3125,5.8302,-1.7351
7,-1.0414,1.1366,-1.1897
8,-1.4016,-0.0433,-0.5161
9,-2.2418,0.1513,0.6373


### Individuals contributions

In [10]:
#individuals contributions
print_dt(ind.contrib.head(10),rowname="Individuals",title=html("<b>Individuals contributions</b>"))

Individuals contributions,Individuals contributions,Individuals contributions,Individuals contributions
Individuals,Dim.1,Dim.2,Dim.3
0,0.4157,0.0001,3.0546
1,0.8009,0.9323,0.8453
2,4.4403,0.3161,0.1272
3,0.1974,0.0517,3.758
4,0.0002,3.3459,3.5154
5,1.7524,20.1515,0.7402
6,0.0676,24.6338,25.6023
7,0.7509,0.9363,12.0363
8,1.3601,0.0014,2.2655
9,3.4799,0.0166,3.4541


### Individuals cos2 

In [11]:
#individuals cos2
print_dt(ind.cos2.head(10),rowname="Individuals",title=html("<b>Individuals cos2</b>"))

Individuals cos2,Individuals cos2,Individuals cos2,Individuals cos2
Individuals,Dim.1,Dim.2,Dim.3
0,0.6256,0.0001,0.3743
1,0.4549,0.506,0.0391
2,0.9343,0.0635,0.0022
3,0.3571,0.0893,0.5535
4,0.0001,0.9178,0.0822
5,0.0832,0.914,0.0029
6,0.0026,0.9162,0.0812
7,0.286,0.3407,0.3733
8,0.8798,0.0008,0.1193
9,0.9213,0.0042,0.0745


### Individuals additionals informations

In [12]:
#individuals additionals informations (weight,squared distance to origin, inertia)
print_dt(ind.infos.head(10),rowname="Individuals",title=html("<b>Individuals additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)","Individuals additionals informations (weight, sq. dist., inertia)"
Individuals,Weight,Sq. Dist.,Inertia,% Inertia
0,0.0256,0.9596,0.0246,0.3262
1,0.0256,2.5425,0.0652,0.8643
2,0.0256,6.864,0.176,2.3334
3,0.0256,0.7984,0.0205,0.2714
4,0.0256,5.0306,0.129,1.7101
5,0.0256,30.4241,0.7801,10.3425
6,0.0256,37.0995,0.9513,12.6117
7,0.0256,3.7918,0.0972,1.289
8,0.0256,2.2326,0.0572,0.759
9,0.0256,5.4547,0.1399,1.8543


## Variables informations

In [13]:
#variables informations
from scientisttools import get_pcaiv_var
var = get_pcaiv_var(res_pcaiv)
var._fields

('coord', 'cos2', 'contrib', 'infos')

### Variables coordinates

In [14]:
#variables coordinates
print_dt(var.coord,rowname="Variables",title=html("<b>Variables coordinates</b>"))

Variables coordinates,Variables coordinates,Variables coordinates,Variables coordinates
Variables,Dim.1,Dim.2,Dim.3
air.temp,-0.7135,-0.2587,-0.0878
wat.temp,-0.5853,-0.4319,0.0121
conduc,0.5911,0.2602,-0.0435
pH,-0.1319,0.1221,-0.1372
oxygen,-0.5734,0.3328,-0.096
secchi,0.6701,-0.5173,0.1121
caco3,0.5621,0.4687,0.0313
totca,0.4802,0.5451,0.0041
mg,0.6913,-0.1308,0.1442
so4,0.1168,-0.748,0.0919


### Variables contributions

In [15]:
#variables contributions
print_dt(var.contrib,rowname="Variables",title=html("<b>Variables contributions</b>"))

Variables contributions,Variables contributions,Variables contributions,Variables contributions
Variables,Dim.1,Dim.2,Dim.3
air.temp,13.7472,1.8911,2.5583
wat.temp,9.2504,5.2712,0.0488
conduc,9.4343,1.9134,0.6277
pH,0.4699,0.4213,6.2473
oxygen,8.8791,3.1303,3.0538
secchi,12.1259,7.5642,4.1654
caco3,8.5319,6.21,0.3244
totca,6.228,8.3971,0.0055
mg,12.9066,0.4835,6.9002
so4,0.3685,15.8148,2.8034


### Variables cos2

In [16]:
#variables cos2
print_dt(var.cos2,rowname="Variables",title=html("<b>Variables cos2</b>"))

Variables cos2,Variables cos2,Variables cos2,Variables cos2
Variables,Dim.1,Dim.2,Dim.3
air.temp,0.8722,0.1146,0.0132
wat.temp,0.6473,0.3524,0.0003
conduc,0.8339,0.1616,0.0045
pH,0.3402,0.2915,0.3683
oxygen,0.7327,0.2468,0.0205
secchi,0.6158,0.367,0.0172
caco3,0.5887,0.4094,0.0018
totca,0.437,0.563,0.0
mg,0.9265,0.0332,0.0403
so4,0.0235,0.962,0.0145


### Addtionals informations

In [17]:
#variables additionals informations
print_dt(var.infos,rowname="Variables",title=html("<b>Variables additionals informations <br> (weight, sq. dist., inertia)</b>"))

"Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)","Variables additionals informations (weight, sq. dist., inertia)"
Variables,Weight,Sq. Dist.,Inertia,% Inertia
air.temp,1.0,0.5837,0.5837,7.7386
wat.temp,1.0,0.5292,0.5292,7.016
conduc,1.0,0.419,0.419,5.5544
pH,1.0,0.0511,0.0511,0.6781
oxygen,1.0,0.4488,0.4488,5.9496
secchi,1.0,0.7292,0.7292,9.6679
caco3,1.0,0.5366,0.5366,7.1147
totca,1.0,0.5277,0.5277,6.9967
mg,1.0,0.5159,0.5159,6.8392
so4,1.0,0.5816,0.5816,7.7113


# Descriptive statistiques

In [18]:
#descriptive statistics for quantitative variables
print_dt(res_pcaiv.summary_quanti_,rowname="",title=html("<b>Descriptive statistics of quantitative variables</b>"))

Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables,Descriptive statistics of quantitative variables
Unnamed: 0_level_1,variable,count,mean,std,min,25%,50%,75%,max
0,air.temp,39,13.7692,7.3823,-5.0,9.0,14.0,19.0,24.0
1,wat.temp,39,13.1026,5.5633,3.4,9.3,11.4,16.65,24.0
2,conduc,39,281.359,36.3372,225.0,254.5,276.0,298.5,359.0
3,pH,39,8.0359,0.1769,7.6,8.0,8.1,8.1,8.3
4,oxygen,39,91.8462,6.9719,77.0,86.0,93.0,96.5,109.0
5,secchi,39,107.5385,62.7685,19.0,61.5,98.0,166.0,250.0
6,caco3,39,157.5385,27.0505,116.0,139.0,156.0,173.0,227.0
7,totca,39,53.0692,9.7806,37.3,46.9,52.5,59.3,75.8
8,mg,39,6.1103,1.2318,2.9,5.3,5.8,6.6,9.2
9,so4,39,36.0821,6.5704,21.3,32.1,37.9,40.3,50.1


# Extractions

In [19]:
#extract functions
from scientisttools import get_eig, get_pcaiv, summaryPCAiv
eig = get_eig(res_pcaiv)
ind, var = get_pcaiv(res_pcaiv,"ind"), get_pcaiv(res_pcaiv,"var")
summaryPCAiv(res_pcaiv,to_markdown=True)

         Principal Component Analysis with Instrumental Variables - Results               

Eigenvalues
|                      |   Dim.1 |   Dim.2 |   Dim.3 |
|:---------------------|--------:|--------:|--------:|
| Variance             |   3.703 |   3.538 |   0.302 |
| Difference           |   0.165 |   3.237 | nan     |
| % of var.            |  49.095 |  46.907 |   3.998 |
| Cumulative % of var. |  49.095 |  96.002 | 100     |

Individuals (the 10 first)

|    |   Weight |   Sq. Dist. |   Inertia |   % Inertia |   Dim.1 |   ctr |   cos2 |   Dim.2 |    ctr |   cos2 |   Dim.3 |    ctr |   cos2 |
|---:|---------:|------------:|----------:|------------:|--------:|------:|-------:|--------:|-------:|-------:|--------:|-------:|-------:|
|  0 |    0.026 |       0.96  |     0.025 |       0.326 |   0.775 | 0.416 |  0.626 |  -0.009 |  0     |  0     |  -0.599 |  3.055 |  0.374 |
|  1 |    0.026 |       2.543 |     0.065 |       0.864 |   1.075 | 0.801 |  0.455 |  -1.134 |  0.932 |  0.506 |  