In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import random

from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import MultiTaskLasso, MultiTaskLassoCV
from sklearn.preprocessing import StandardScaler

from scipy.stats import matrix_normal

In [2]:
# 讀取 model_info
model_info_path = '/workspaces/BO_EXPERIMENTS/src/results/20260127/PBTc/model/lasso.pkl'
model_info = joblib.load(model_info_path)

In [3]:
# 讀取模型的係數
model = model_info['pipe']['model']
model_coef = model.coef_
coef_df = pd.DataFrame(model.coef_, index=model_info['target_cols'], columns=model_info['data_cols'])
print(coef_df)

      AA001  AA002  AA003  AA004  AA005  AA006  AA010  AA013  AW001  AW002  \
SPGR    0.0   -0.0    0.0    0.0   -0.0    0.0   -0.0    0.0    0.0    0.0   
TE     -0.0    0.0    0.0   -0.0    0.0   -0.0   -0.0    0.0    0.0    0.0   

      ...  SS001     SS004  SS006  SS008     SS010  SS011  SS012  SS017  \
SPGR  ...   -0.0 -0.007812   -0.0   -0.0 -0.004637   -0.0   -0.0   -0.0   
TE    ...    0.0  0.031992   -0.0   -0.0  0.061188    0.0    0.0    0.0   

      SS028  SS051  
SPGR   -0.0   -0.0  
TE      0.0    0.0  

[2 rows x 125 columns]


In [4]:
# 把模型的係數當作是Matrix Normal Distribution 的 M，設定col間的相關係數矩陣為 U，row 間的相關係數矩陣為 V
p = 0.8
M = model_coef
U = np.eye(model_coef.shape[-1])
V = np.array(
    [
        [1, p],
        [p,1]
    ]
)

# 生成 500 組 beta 值
coef_sample_ls = [np.transpose(matrix_normal.rvs(mean=M, rowcov=V, colcov=U)) for i in range(500)]

# coef_sample = np.transpose(matrix_normal.rvs(mean=M, rowcov=V, colcov=U))

In [None]:
# 隨機生成 1000 筆 sparse X 資料
N = 1000
D = model_coef.shape[-1]
k = 5
X = []
for i in range(N):
    x = np.zeros((1, D))

    # 隨機取 k 個 active term
    active_term = np.random.choice(np.arange(model_coef.shape[-1]), size=k, replace=False)

    # 從 dirichlet distribution 中生成 1000 筆資料
    alpha = np.ones(k)
    active_mixtures = np.random.dirichlet(alpha, size=1)

    x[:, active_term] = active_mixtures
    X.append(x)

X = np.concat(X, axis=0) * 100

# 每個 beta 值都可以對這 1000 筆 X 產生 1000 筆Y 並計算一次相關係數
corr_coef_ls = []
for coef_sample in coef_sample_ls:
    # 透過上面生產的 X 以及 coef_sample 生成 Y
    Y = np.dot(X, coef_sample)

    # 計算相關係數
    corr_coef = np.corrcoef(Y[:, 0], Y[:, 1])
    corr_coef_ls.append(corr_coef[0,1].item())

# 印出這 500 筆相關係數的平均值
print(np.mean(corr_coef_ls).round(3))

# # 透過上面生產的 X 以及 coef_sample 生成 Y
# Y = np.dot(X, coef_sample)

# 計算相關係數
# corr_coef = np.corrcoef(Y[:, 0], Y[:, 1])
# print(corr_coef.round(3))

0.8
