# CA - predictCA & supvarCA

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")

In [2]:
# Load children dataset
from scientisttools import load_children
children = load_children()
# Add qualitatives variables
children["group"] = ["A"]*4 + ["B"]*5 + ["C"]*5 +["D"]*4

In [3]:
# Actifs elements
actif = children.iloc[:14,:5]
actif.info()

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, money to work
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   unqualified          14 non-null     int32
 1   cep                  14 non-null     int32
 2   bepc                 14 non-null     int32
 3   high_school_diploma  14 non-null     int32
 4   university           14 non-null     int32
dtypes: int32(5)
memory usage: 392.0+ bytes


In [4]:
# Supplementary rows
row_sup = children.iloc[14:,:5]
row_sup.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, comfort to to_live
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype
---  ------               --------------  -----
 0   unqualified          4 non-null      int32
 1   cep                  4 non-null      int32
 2   bepc                 4 non-null      int32
 3   high_school_diploma  4 non-null      int32
 4   university           4 non-null      int32
dtypes: int32(5)
memory usage: 112.0+ bytes


In [5]:
# Supplementary columns
X_col_sup = children.iloc[:14,5:8]
X_col_sup.info()

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, money to work
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   thirty      14 non-null     object
 1   fifty       14 non-null     object
 2   more_fifty  14 non-null     object
dtypes: object(3)
memory usage: 448.0+ bytes


In [6]:
X_quali_sup = children.iloc[:14,8]
X_quali_sup.info()

<class 'pandas.core.series.Series'>
Index: 14 entries, money to work
Series name: group
Non-Null Count  Dtype 
--------------  ----- 
14 non-null     object
dtypes: object(1)
memory usage: 224.0+ bytes


In [7]:
# Instanciation du modèle
from scientisttools import CA
res_ca = CA(n_components=None,row_sup=list(range(14,18)),col_sup=list(range(5,8)),quali_sup=8)
res_ca.fit(children)

In [8]:
# Apply transformation
res_ca.transform(row_sup)

Unnamed: 0,Dim.1,Dim.2,Dim.3,Dim.4
comfort,0.20967,0.703168,0.071112,0.307135
disagreement,0.146278,0.119011,0.171089,-0.313217
world,0.523304,0.142971,0.083993,-0.10636
to_live,0.308307,0.502019,0.520934,0.255736


# Predict

In [9]:
from scientisttools import predictCA
predict = predictCA(res_ca, X=row_sup)
predict.keys()

dict_keys(['coord', 'cos2', 'dist'])

In [10]:
predict["coord"].head(6)

Unnamed: 0_level_0,Dim.1,Dim.2,Dim.3,Dim.4
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
comfort,0.20967,0.703168,0.071112,0.307135
disagreement,0.146278,0.119011,0.171089,-0.313217
world,0.523304,0.142971,0.083993,-0.10636
to_live,0.308307,0.502019,0.520934,0.255736


In [11]:
predict["cos2"].head(6)

Unnamed: 0_level_0,Dim.1,Dim.2,Dim.3,Dim.4
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
comfort,0.068928,0.77524,0.007929,0.147903
disagreement,0.131322,0.086926,0.179649,0.602103
world,0.875877,0.065377,0.022564,0.036182
to_live,0.138997,0.368536,0.39683,0.095636


In [12]:
predict["dist"].head(6)

rownames
comfort         0.637796
disagreement    0.162937
world           0.312655
to_live         0.683849
Name: Sq. Dist., dtype: float64

In [13]:
from scientisttools import supvarCA
supvar = supvarCA(res_ca,X_col_sup=X_col_sup,X_quanti_sup=X_col_sup,X_quali_sup=X_quali_sup)
supvar["col"]

{'coord':                Dim.1     Dim.2     Dim.3     Dim.4
 thirty      0.105413 -0.059696 -0.103226  0.069780
 fifty      -0.017064  0.049077 -0.015689 -0.013061
 more_fifty -0.177068 -0.048138  0.100773 -0.085175,
 'cos2':                Dim.1     Dim.2     Dim.3     Dim.4
 thirty      0.137560  0.044115  0.131911  0.060278
 fifty       0.010870  0.089903  0.009188  0.006368
 more_fifty  0.286099  0.021145  0.092667  0.066201,
 'dist': thirty        0.080779
 fifty         0.026790
 more_fifty    0.109588
 Name: Sq. Dist., dtype: float64}

In [14]:
supvar["quanti"]

{'coord':                Dim.1     Dim.2     Dim.3     Dim.4
 thirty      0.121921 -0.605608 -0.253446 -0.030834
 fifty       0.016173 -0.601050 -0.247368 -0.088893
 more_fifty -0.435563 -0.511125 -0.006104 -0.212475,
 'cos2':                Dim.1     Dim.2     Dim.3     Dim.4
 thirty      0.014865  0.366761  0.064235  0.000951
 fifty       0.000262  0.361261  0.061191  0.007902
 more_fifty  0.189715  0.261249  0.000037  0.045146}

In [15]:
supvar["quali"]

{'coord':       Dim.1     Dim.2     Dim.3     Dim.4
 A -0.021294 -0.048999  0.001423  0.002995
 B  0.004519  0.093721  0.024615  0.002073
 C  0.033353  0.025645 -0.017511 -0.006352,
 'cos2':       Dim.1     Dim.2     Dim.3     Dim.4
 A  0.158249  0.837913  0.000707  0.003131
 B  0.002169  0.933017  0.064358  0.000456
 C  0.525452  0.310650  0.144839  0.019059,
 'vtest':       Dim.1     Dim.2     Dim.3     Dim.4
 A -0.870975 -2.004171  0.058216  0.122506
 B  0.085968  1.782850  0.468242  0.039425
 C  0.875353  0.673057 -0.459577 -0.166714,
 'eta2':           Dim.1     Dim.2     Dim.3     Dim.4
 group  0.016166  0.233092  0.028208  0.002816,
 'dist': A    0.002865
 B    0.009414
 C    0.002117
 Name: Sq. Dist., dtype: float64}