In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from scipy.stats import kruskal
import itertools

# 读取数据
data = pd.read_excel('附件一（训练集）.xlsx')

# 提取特征（第1到第4列）和磁通密度列（第5到第1029列）
leixing = data.iloc[:, 0]
temperature = data.iloc[:, 1]
core_loss = data.iloc[:, 3]
excitation_waveform = data.iloc[:, 4]

# 编码分类变量
label_encoder = LabelEncoder()
excitation_waveform_encoded = label_encoder.fit_transform(excitation_waveform)
leixing_encoded = label_encoder.fit_transform(leixing)

# 构建特征矩阵
features = pd.DataFrame({
    'temperature': temperature,
    'excitation_waveform': excitation_waveform_encoded,
    'leixing': leixing_encoded
})

# 交互作用检验
def interaction_test(var1, var2):
    # 组合自变量
    interaction = features[var1].astype(str) + '_' + features[var2].astype(str)
    features['interaction'] = interaction

    # 准备进行检验
    groups = features.groupby('interaction')[core_loss.name].apply(list)
    stat, p_value = kruskal(*groups)

    return stat, p_value

# 检验不同变量的交互作用
variables = ['leixing', 'excitation_waveform', 'temperature']
results = {}

for var1, var2 in itertools.combinations(variables, 2):
    stat, p_value = interaction_test(var1, var2)
    results[f'{var1} x {var2}'] = {'statistic': stat, 'p-value': p_value}

# 输出结果
for interaction, result in results.items():
    print(f'Interaction: {interaction}, Statistic: {result["statistic"]}, p-value: {result["p-value"]}')


KeyError: 'Column not found: 磁芯损耗，w/m3'

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from scipy.stats import kruskal
import itertools

# 读取数据
data = pd.read_excel('附件一（训练集）.xlsx')

# 提取特征（第1到第4列）和磁通密度列（第5到第1029列）
leixing = data.iloc[:, 0]
temperature = data.iloc[:, 1]
core_loss = data.iloc[:, 3]  # 确保这里的索引正确
excitation_waveform = data.iloc[:, 4]

# 编码分类变量
label_encoder = LabelEncoder()
excitation_waveform_encoded = label_encoder.fit_transform(excitation_waveform)
leixing_encoded = label_encoder.fit_transform(leixing)

# 构建特征矩阵
features = pd.DataFrame({
    'temperature': temperature,
    'excitation_waveform': excitation_waveform_encoded,
    'leixing': leixing_encoded,
    'core_loss': core_loss  # 将 core_loss 添加到特征矩阵中
})

# 交互作用检验
def interaction_test(var1, var2):
    # 组合自变量
    interaction = features[var1].astype(str) + '_' + features[var2].astype(str)
    features['interaction'] = interaction

    # 准备进行检验
    groups = features.groupby('interaction')['core_loss'].apply(list)
    stat, p_value = kruskal(*groups)

    return stat, p_value

# 检验不同变量的交互作用
variables = ['leixing', 'excitation_waveform', 'temperature']
results = {}

for var1, var2 in itertools.combinations(variables, 2):
    stat, p_value = interaction_test(var1, var2)
    results[f'{var1} x {var2}'] = {'statistic': stat, 'p-value': p_value}

# 输出结果
for interaction, result in results.items():
    print(f'Interaction: {interaction}, Statistic: {result["statistic"]}, p-value: {result["p-value"]}')


Interaction: leixing x excitation_waveform, Statistic: 1174.2621580753141, p-value: 5.701628975408755e-245
Interaction: leixing x temperature, Statistic: 363.4425584471862, p-value: 3.2298626487044606e-68
Interaction: excitation_waveform x temperature, Statistic: 919.9240744401806, p-value: 3.2261009925169954e-190


In [24]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import kruskal
import itertools

# 假设data已经加载为DataFrame
leixing = data.iloc[:, 0].values.reshape(-1, 1)
temperature = data.iloc[:, 1].values.reshape(-1, 1)
core_loss = data.iloc[:, 3].values  # 确保这里的索引正确
excitation_waveform = data.iloc[:, 4].values.reshape(-1, 1)

# 对励磁波形、温度和材料进行独热编码
waveform_encoder = OneHotEncoder(sparse_output=False)
temperature_encoder = OneHotEncoder(sparse_output=False)
leixing_encoder = OneHotEncoder(sparse_output=False)

leixing_encoded = leixing_encoder.fit_transform(leixing)
temperature_encoded = temperature_encoder.fit_transform(temperature)
excitation_waveform_encoded = waveform_encoder.fit_transform(excitation_waveform)

# 将编码结果合并到一个DataFrame中
features = pd.DataFrame(np.hstack([leixing_encoded, temperature_encoded, excitation_waveform_encoded]), 
                        columns=(leixing_encoder.get_feature_names_out().tolist() + 
                                 temperature_encoder.get_feature_names_out().tolist() + 
                                 waveform_encoder.get_feature_names_out().tolist()))

# 添加因变量core_loss
features['core_loss'] = core_loss

# 交互作用检验
def interaction_test(var1, var2):
    # 组合自变量
    interaction = features[var1].astype(str) + '_' + features[var2].astype(str)
    features['interaction'] = interaction

    # 准备进行检验
    groups = features.groupby('interaction')['core_loss'].apply(list)
    stat, p_value = kruskal(*groups)

    return stat, p_value

# 检验不同变量的交互作用
variables = features.columns[:-1]  # 选择自变量，不包括因变量
results = {}

for var1, var2 in itertools.combinations(variables, 2):
    stat, p_value = interaction_test(var1, var2)
    results[f'Interaction {var1} x {var2}'] = {'statistic': stat, 'p-value': p_value}

# 输出结果
for interaction, result in results.items():
    print(f'{interaction}, Statistic: {result["statistic"]}, p-value: {result["p-value"]}')


Interaction x0_材料1 x x0_材料2, Statistic: 34.74415246719091, p-value: 2.853666076713013e-08
Interaction x0_材料1 x x0_材料3, Statistic: 114.6874727867579, p-value: 1.247187394252569e-25
Interaction x0_材料1 x x0_材料4, Statistic: 288.77277549674596, p-value: 1.9669283451572606e-63
Interaction x0_材料1 x x0_25, Statistic: 58.64969324477283, p-value: 1.1420313920047882e-12
Interaction x0_材料1 x x0_50, Statistic: 1.069891576480967, p-value: 0.7843468924715999
Interaction x0_材料1 x x0_70, Statistic: 17.020847046067452, p-value: 0.0006997993894159842
Interaction x0_材料1 x x0_90, Statistic: 18.47839612928546, p-value: 0.000350410565764165
Interaction x0_材料1 x x0_三角波, Statistic: 282.4110085628947, p-value: 6.369997661637798e-61
Interaction x0_材料1 x x0_梯形波, Statistic: 164.56268795738916, p-value: 1.898399156085309e-35
Interaction x0_材料1 x x0_正弦波, Statistic: 824.8241311608167, p-value: 1.7879771500583213e-178
Interaction x0_材料2 x x0_材料3, Statistic: 200.43259441187928, p-value: 2.9965058278453686e-44
Interacti

In [27]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# 假设data已经加载为DataFrame
leixing= data.iloc[:, 0].values  # 材料类型
temperature = data.iloc[:, 1].values  # 温度
core_loss = data.iloc[:, 3].values  # 核损耗
excitation_waveform= data.iloc[:, 4].values  # 励磁波形
# 将你的数据转换为DataFrame，并确保分类变量的正确处理
data = pd.DataFrame({
    'leixing': leixing,  # 材料类型
    'temperature': temperature,  # 温度
    'core_loss': core_loss,  # 核损耗
    'excitation_waveform': excitation_waveform  # 励磁波形
})

# 确保这些列被视为分类变量
data['leixing'] = data['leixing'].astype('category')
data['temperature'] = data['temperature'].astype('category')
data['excitation_waveform'] = data['excitation_waveform'].astype('category')

# 使用包含交互项的线性模型
formula = 'core_loss ~ C(leixing) * C(temperature) * C(excitation_waveform)'
model = smf.ols(formula=formula, data=data).fit()

# 输出模型摘要结果，查看系数和交互项的显著性
print(model.summary())

# 获取交互效应的p值
interaction_pvalues = model.pvalues
print(interaction_pvalues)


                            OLS Regression Results                            
Dep. Variable:              core_loss   R-squared:                       0.081
Model:                            OLS   Adj. R-squared:                  0.078
Method:                 Least Squares   F-statistic:                     23.20
Date:                Sun, 22 Sep 2024   Prob (F-statistic):          1.51e-188
Time:                        15:19:50   Log-Likelihood:            -1.7626e+05
No. Observations:               12400   AIC:                         3.526e+05
Df Residuals:                   12352   BIC:                         3.530e+05
Df Model:                          47                                         
Covariance Type:            nonrobust                                         
                                                                           coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------

In [30]:
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare
# 读取数据
data = pd.read_excel('附件一（训练集）.xlsx')
# 假设数据已经加载为 DataFrame，且 leixing、temperature、core_loss、excitation_waveform 已正确存储在各列中
# 示例数据加载
# data = pd.read_csv('your_data_file.csv') # 这里是你的数据文件路径
# 假设data已经加载为DataFrame
leixing= data.iloc[:, 0].values  # 材料类型
temperature = data.iloc[:, 1].values  # 温度
core_loss = data.iloc[:, 3].values  # 核损耗
excitation_waveform= data.iloc[:, 4].values  # 励磁波形

# 创建一个新的 DataFrame 用于分析
df = pd.DataFrame({
    'leixing': leixing,
    'temperature': temperature,
    'core_loss': core_loss,
    'excitation_waveform': excitation_waveform
})

# 查看前5行数据
print("数据预览:")
print(df.head())

# 定义函数，执行弗里德曼检验并输出结果
def friedman_test_for_single_variable(data, var):
    # 按分类变量分组后提取核损耗值
    groups = [group['core_loss'].values for name, group in data.groupby(var)]
    # 执行弗里德曼检验
    if len(groups) > 1:  # 确保至少有两个组
        stat, p = friedmanchisquare(*groups)
        print(f"Friedman test for {var}: chi-square statistic = {stat}, p-value = {p}")
        return stat, p
    else:
        print(f"变量 {var} 的分类值不足以执行弗里德曼检验。")
        return None, None

# 对单独的材料类型、温度、励磁波形执行弗里德曼检验
#print("\n单个变量弗里德曼检验结果:")
#friedman_test_for_single_variable(df, 'leixing')
#friedman_test_for_single_variable(df, 'temperature')
#friedman_test_for_single_variable(df, 'excitation_waveform')




数据预览:
  leixing  temperature    core_loss excitation_waveform
0     材料1           25  1997.955250                 正弦波
1     材料1           25  2427.749830                 正弦波
2     材料1           25  3332.725760                 正弦波
3     材料1           25  4502.908007                 正弦波
4     材料1           25  6063.023248                 正弦波

单个变量弗里德曼检验结果:


ValueError: Unequal N in friedmanchisquare.  Aborting.

In [32]:
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare
# 读取数据
data = pd.read_excel('附件一（训练集）.xlsx')
# 假设数据已经加载为 DataFrame，且 leixing、temperature、core_loss、excitation_waveform 已正确存储在各列中
# 示例数据加载
# data = pd.read_csv('your_data_file.csv') # 这里是你的数据文件路径
# 假设data已经加载为DataFrame
leixing= data.iloc[:, 0].values  # 材料类型
temperature = data.iloc[:, 1].values  # 温度
core_loss = data.iloc[:, 3].values  # 核损耗
excitation_waveform= data.iloc[:, 4].values  # 励磁波形

# 创建一个新的 DataFrame 用于分析
df = pd.DataFrame({
    'leixing': leixing,
    'temperature': temperature,
    'core_loss': core_loss,
    'excitation_waveform': excitation_waveform
})

# 查看前5行数据
print("数据预览:")
print(df.head())



数据预览:
  leixing  temperature    core_loss excitation_waveform
0     材料1           25  1997.955250                 正弦波
1     材料1           25  2427.749830                 正弦波
2     材料1           25  3332.725760                 正弦波
3     材料1           25  4502.908007                 正弦波
4     材料1           25  6063.023248                 正弦波


In [43]:
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare

# 定义函数，执行弗里德曼检验并输出结果
def friedman_test_for_single_variable(data, var):
    # 按分类变量分组后提取核损耗值
    groups = [group['core_loss'].values for name, group in data.groupby(var)]
    
    # 检查所有组是否有相同数量的样本
    group_lengths = [len(group) for group in groups]
    if len(set(group_lengths)) != 1:
        print(f"分类变量 {var} 的分组样本数量不一致，无法执行弗里德曼检验。")
        print(f"每组的样本数量分别为: {group_lengths}")
        return None, None

    # 执行弗里德曼检验
    if len(groups) > 1:  # 确保至少有两个组
        stat, p = friedmanchisquare(*groups)
        print(f"Friedman test for {var}: chi-square statistic = {stat}, p-value = {p}")
        return stat, p
    else:
        print(f"变量 {var} 的分类值不足以执行弗里德曼检验。")
        return None, None

# 对单独的材料类型、温度、励磁波形执行弗里德曼检验
print("\n单个变量弗里德曼检验结果:")
friedman_test_for_single_variable(df, 'leixing')
friedman_test_for_single_variable(df, 'temperature')
friedman_test_for_single_variable(df, 'excitation_waveform')

# 创建交互变量时，确保变量是字符串类型
df['leixing'] = df['leixing'].astype(str)
df['temperature'] = df['temperature'].astype(str)
df['excitation_waveform'] = df['excitation_waveform'].astype(str)

# 创建交互变量
df['leixing_temperature'] = df['leixing'] + "_" + df['temperature']
df['temperature_excitation_waveform'] = df['temperature'] + "_" + df['excitation_waveform']
df['leixing_excitation_waveform'] = df['leixing'] + "_" + df['excitation_waveform']

# 对交互变量执行弗里德曼检验
print("\n交互变量弗里德曼检验结果:")
friedman_test_for_single_variable(df, 'leixing_temperature')
friedman_test_for_single_variable(df, 'temperature_excitation_waveform')
friedman_test_for_single_variable(df, 'leixing_excitation_waveform')


单个变量弗里德曼检验结果:
分类变量 leixing 的分组样本数量不一致，无法执行弗里德曼检验。
每组的样本数量分别为: [3400, 3000, 3200, 2800]
分类变量 temperature 的分组样本数量不一致，无法执行弗里德曼检验。
每组的样本数量分别为: [3244, 3070, 3030, 3056]
分类变量 excitation_waveform 的分组样本数量不一致，无法执行弗里德曼检验。
每组的样本数量分别为: [4948, 3398, 4054]

交互变量弗里德曼检验结果:
分类变量 leixing_temperature 的分组样本数量不一致，无法执行弗里德曼检验。
每组的样本数量分别为: [861, 845, 843, 851, 813, 727, 746, 714, 850, 801, 754, 795, 720, 697, 687, 696]
分类变量 temperature_excitation_waveform 的分组样本数量不一致，无法执行弗里德曼检验。
每组的样本数量分别为: [1338, 776, 1130, 1215, 856, 999, 1195, 871, 964, 1200, 895, 961]
分类变量 leixing_excitation_waveform 的分组样本数量不一致，无法执行弗里德曼检验。
每组的样本数量分别为: [1412, 921, 1067, 1003, 900, 1097, 1078, 1112, 1010, 1455, 465, 880]


(None, None)