In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

# 读取数据
data = pd.read_excel('附件一（训练集）.xlsx', sheet_name=None,)

# 合并四个材料的数据
df_list = []
for sheet_name, df in data.items():
    df['material'] = sheet_name  # 添加材料类别
    df_list.append(df)
df = pd.concat(df_list, ignore_index=True)
print(df.columns)

# 重命名列
df.columns = ['Temperature', 'Frequency', 'Core_Loss', 'Waveform', 'Material']  # 修正错别字
print(df.columns)


Index(['温度，oC', '频率，Hz', '磁芯损耗，w/m3', '励磁波形', 'material'], dtype='object')
Index(['Temperature', 'Frequency', 'Core_Loss', 'Waveform', 'Material'], dtype='object')


In [None]:
from sklearn.feature_selection import mutual_info_regression

# 将励磁波形和磁芯材料进行独热编码
encoder = OneHotEncoder(sparse_output=False)
encoded_features = encoder.fit_transform(df[['Waveform', 'Material']])

# 计算互信息
mi = mutual_info_regression(encoded_features, df['Core_Loss'])

# 输出互信息结果
for feature, score in zip(encoder.get_feature_names_out(['Waveform', 'Material']), mi):
    print(f'Mutual Information between {feature} and Core Loss: {score}')


In [15]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

# 假设 df 已经存在并且具有正确的列名

# 1. 对励磁波形和材料分别进行独热编码
waveform_encoder = OneHotEncoder(sparse_output=False)
material_encoder = OneHotEncoder(sparse_output=False)

encoded_waveform = waveform_encoder.fit_transform(df[['Waveform']])
encoded_material = material_encoder.fit_transform(df[['Material']])

# 2. 创建交互项：逐个编码的波形和材料列相乘，产生交互项
interaction_terms = np.array([encoded_waveform[:, i] * encoded_material[:, j] 
                              for i in range(encoded_waveform.shape[1]) 
                              for j in range(encoded_material.shape[1])]).T

# 3. 将编码后的波形、材料及其交互项组合在一起
X = np.concatenate([encoded_waveform, encoded_material, interaction_terms], axis=1)
y = df['Core_Loss']

# 4. 特征标准化：使用 StandardScaler 进行特征缩放
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 5. 使用 Ridge 回归拟合模型，alpha 值可以根据需要调整正则化的强度
model = Ridge(alpha=1.0)  # 可以调整 alpha 来控制正则化的强度
model.fit(X_scaled, y)

# 6. 打印回归系数
coefficients = model.coef_
print(f"Ridge Coefficients after Scaling: {coefficients}")

# 7. 进行交叉验证，评估模型稳定性和性能
scores = cross_val_score(model, X_scaled, y, cv=5)  # 5折交叉验证
print(f"Cross-Validation Scores: {scores}")
print(f"Average Cross-Validation Score: {np.mean(scores)}")

# 8. 生成特征名：将Waveform和Material的特征名分别生成，并加入交互项的特征名
waveform_feature_names = waveform_encoder.get_feature_names_out(['Waveform'])
material_feature_names = material_encoder.get_feature_names_out(['Material'])
interaction_feature_names = [f"Waveform_{i} x Material_{j}" for i in range(encoded_waveform.shape[1]) 
                                                             for j in range(encoded_material.shape[1])]

# 将所有特征名组合在一起
feature_names = np.concatenate([waveform_feature_names, material_feature_names, interaction_feature_names])

# 9. 打印回归系数和特征名的对应关系
coefficients_df = pd.DataFrame({'Feature': feature_names, 'Coefficient': coefficients})
print(coefficients_df)


Ridge Coefficients after Scaling: [ 16171.99658642  13978.3487141  -30173.33608255  -5041.85151554
   9326.48539863  15590.1717235  -20488.99077586   3025.85024943
  22615.6133191   17760.70918723 -13088.16665733  -6062.74223706
  10448.01091709  23636.05230767  -8629.82986905  -5778.97095151
 -17195.22741645 -18043.51176144 -10573.99346559]
Cross-Validation Scores: [-2.30722911  0.03301685  0.0287332  -0.01015243 -0.07214884]
Average Cross-Validation Score: -0.46555606560905255
                    Feature   Coefficient
0              Waveform_三角波  16171.996586
1              Waveform_梯形波  13978.348714
2              Waveform_正弦波 -30173.336083
3        Material_material1  -5041.851516
4        Material_material2   9326.485399
5        Material_material3  15590.171724
6        Material_material4 -20488.990776
7   Waveform_0 x Material_0   3025.850249
8   Waveform_0 x Material_1  22615.613319
9   Waveform_0 x Material_2  17760.709187
10  Waveform_0 x Material_3 -13088.166657
11  Waveform