# PCA with Autos Dataset - PredictPCA & supvarPCA

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")

In [2]:
# load dataset
from scientisttools import load_cars2006
D = load_cars2006(which="actif")
D.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, Alfasud TI to Lada 1300
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   CYL     18 non-null     int64
 1   PUISS   18 non-null     int64
 2   LONG    18 non-null     int64
 3   LARG    18 non-null     int64
 4   POIDS   18 non-null     int64
 5   VMAX    18 non-null     int64
dtypes: int64(6)
memory usage: 1008.0+ bytes


In [3]:
# PCA
from scientisttools import PCA
res_pca = PCA(n_components=5)
res_pca.fit(D)

In [4]:
# Load supplementary individuals
ind_sup = load_cars2006(which="indsup")
print(ind_sup)

                CYL  PUISS  LONG  LARG  POIDS  VMAX
Modele                                             
Peugeot 604    2664    136   472   177   1410   180
Peugeot 304 S  1288     74   414   157    915   160


## Supplementary individuals

### transform

In [5]:
# factor coordinates of supplementary individuals
ind_sup_coord = res_pca.transform(ind_sup)
ind_sup_coord

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Peugeot 604,5.563292,0.338609,-0.464289,0.402146,0.389811
Peugeot 304 S,-2.212241,1.257779,-0.093044,-0.353702,-0.648528


### predictPCA

In [6]:
from scientisttools import predictPCA
predict = predictPCA(res_pca,X=ind_sup)
predict["coord"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Peugeot 604,5.563292,0.338609,-0.464289,0.402146,0.389811
Peugeot 304 S,-2.212241,1.257779,-0.093044,-0.353702,-0.648528


In [7]:
predict["cos2"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
Peugeot 604,0.979416,0.003628,0.006822,0.005118,0.004809
Peugeot 304 S,0.694587,0.224528,0.001229,0.017756,0.059692


In [8]:
predict["dist"]

Peugeot 604      31.600679
Peugeot 304 S     7.045929
Name: Sq. Dist., dtype: float64

In [9]:
# Supplementary quantitatives variables
X_quanti_sup = load_cars2006(which="varquantsup")
# Supplementary qualitatives variables
X_quali_sup = load_cars2006(which="varqualsup")

In [10]:
from scientisttools import supvarPCA
sup_var_predict = supvarPCA(res_pca,X_quanti_sup=X_quanti_sup,X_quali_sup=X_quali_sup)

### Quantitatives variables

In [11]:
# Variables quantitatives
quanti_sup = sup_var_predict["quanti"]
quanti_sup.keys()

dict_keys(['coord', 'cor', 'cos2'])

In [12]:
# Coordinates
quanti_sup["coord"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
PRIX,0.772475,0.086708,-0.133893,-0.225829,0.15945
RPOIDPUIS,-0.589039,-0.672545,-0.150176,0.213657,-0.101628


In [13]:
# cor
quanti_sup["cor"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
PRIX,0.772475,0.086708,-0.133893,-0.225829,0.15945
RPOIDPUIS,-0.589039,-0.672545,-0.150176,0.213657,-0.101628


In [14]:
# Cos2
quanti_sup["cos2"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
PRIX,0.596718,0.007518,0.017927,0.050999,0.025424
RPOIDPUIS,0.346967,0.452317,0.022553,0.045649,0.010328


### Qualitatives

In [15]:
quali_sup = sup_var_predict["quali"]
quali_sup.keys()

dict_keys(['coord', 'cos2', 'vtest', 'dist', 'eta2'])

In [16]:
quali_sup["coord"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
1_M,-2.000355,-0.022579,-0.069577,0.055847,-0.055043
2_B,0.235313,0.045271,0.113971,0.218518,-0.078688
3_TB,1.39243,-0.034001,-0.074984,-0.301477,0.137672


In [17]:
quali_sup["cos2"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
1_M,0.996454,0.000127,0.001206,0.000777,0.000754
2_B,0.445067,0.016473,0.104404,0.383802,0.049768
3_TB,0.941991,0.000562,0.002732,0.044158,0.009209


In [18]:
quali_sup["vtest"]

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4,Dim.5
1_M,-2.432717,-0.062401,-0.291282,0.308754,-0.462022
2_B,0.368103,0.160934,0.61373,1.553949,-0.849589
3_TB,1.930766,-0.107138,-0.357922,-1.900361,1.317582


In [19]:
quali_sup["dist"]

1_M     4.015660
2_B     0.124413
3_TB    2.058260
Name: Sq. Dist., dtype: float64