In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston
from sklearn.cross_decomposition import PLSRegression

# データセットのload
# 説明変数データを取得
X = load_boston().data 
# 目的変数データを取得
y = load_boston().target.reshape(506,1)

# 変数を標準化
scaler = StandardScaler()

X = scaler.fit_transform(X)
y = scaler.fit_transform(y)

def PLS_original(X,y,component):
    '''
    返り値
    Z：潜在変数
    s：潜在変数内のLoading
    t：潜在変数のLoading
    y：予測値
    
    '''
    # 第1潜在変数の重みを算出
    w_1 = (X.T @ y) / (np.sqrt(sum((X.T @ y)**2)))

    # 正規方程式を解き、各種ローディングを求める
    '''
    Z_1：第1潜在変数
    s_1：説明変数を潜在変数で表す際に用いたLoading
    t_1：目的変数を潜在変数で表す際に用いたLoading
    '''
    Z_1 = X @ w_1
    s_1 = (X.T @ Z_1) / (Z_1.T @ Z_1) # 単回帰の係数算出と同じ原理
    t_1 = (y.T @ Z_1) / (Z_1.T @ Z_1) # 単回帰の係数算出と同じ原理

    # 第1潜在変数における変数更新
    X_new = X - Z_1 @ s_1.T
    y_new = y - Z_1 @ t_1
    
    if component == 1:
        Z = Z_1
        s = s_1
        t = t_1
        y_pred = Z @ t
        return Z, s, t, y_pred
        
    ########## 成分数繰り返す ##########
    # 空のDataFrameを用意
    Z_df = pd.DataFrame(Z_1)
    s_df = pd.DataFrame(s_1)
    t_df = pd.DataFrame(t_1)
    
    for i in range(component - 1):
        # 第i潜在変数の重みを算出
        w_i = (X_new.T @ y_new) / (np.sqrt(sum((X_new.T @ y_new)**2)))
        
        # 正規方程式を解き、各種ローディングを求める
        Z_i = X_new @ w_i
        s_i = (X_new.T @ Z_i) / (Z_i.T @ Z_i) # 単回帰の係数算出と同じ原理
        t_i = (y_new.T @ Z_i) / (Z_i.T @ Z_i) # 単回帰の係数算出と同じ原理
        
        # 第1潜在変数における変数更新
        X_new = X_new - Z_i @ s_i.T
        y_new = y_new - Z_i @ t_i
        
        # データを順番に格納
        Z_df = pd.concat([Z_df,pd.DataFrame(Z_i)],axis=1)
        s_df = pd.concat([s_df,pd.DataFrame(s_i)],axis=1)
        t_df = pd.concat([t_df,pd.DataFrame(t_i)],axis=1)

    # 最終的な予測値を算出
    y_pred = Z_df.values @ t_df.T.values
    
    return Z_df, s_df, t_df, y_pred



In [2]:
PLS_original(X,y,component=10)[3].sum()

-4.618527782440651e-13

In [3]:
PLS_original(X, y, component=10)[2]

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
0,0.304188,0.389431,0.158005,0.151064,0.085118,0.0721,0.060469,0.041069,0.007662,0.007468


In [4]:
# sklearnで実装
plsr = PLSRegression(n_components=10)

model = plsr.fit(X,y)
model.y_loadings_
print(model.predict(X).sum())

-2.2026824808563106e-13


In [5]:
# t_1
model.y_loadings_

array([[-0.30418772,  0.38943061, -0.1580054 , -0.15106413, -0.08511757,
        -0.07210036,  0.06046899, -0.04106875,  0.00766153,  0.00746824]])