### 方差分析
不考虑交互项

In [1]:
import pandas as pd
import scipy.stats as stats

# 读取数据
file_path = '样本池2.csv'  # 替换为你的文件路径
df = pd.read_csv(file_path)

# 目标变量和因素变量
target_variables = ['release_Doxy', 'release_TPC']
factors = ['PVA', 'Agarose', 'TPC_content', 'freeze_thaw cycle', 'time']

# 清理数据：删除含有缺失值的行
df_clean = df.dropna()

# 执行方差分析（ANOVA）
anova_results = {}
for target in target_variables:
    anova_results[target] = {}
    for factor in factors:
        # 按照因素的唯一值分组，确保每个组有足够的样本
        groups = [df_clean[df_clean[factor] == value][target] for value in df_clean[factor].unique()]
        
        # 如果分组不足（少于2个组或组内样本少于2），跳过该因素
        if len(groups) < 2 or any(len(group) < 2 for group in groups):
            anova_results[target][factor] = {'F-statistic': None, 'p-value': None, 'message': 'Insufficient data'}
            continue
        
        # 执行ANOVA检验
        f_stat, p_value = stats.f_oneway(*groups)
        anova_results[target][factor] = {'F-statistic': f_stat, 'p-value': p_value}

# 输出结果
anova_results


FileNotFoundError: [Errno 2] No such file or directory: '样本池2.csv'

考虑交互项

In [None]:
import pandas as pd
import scipy.stats as stats

# 读取数据
file_path = '样本池2.csv'  # 替换为你的文件路径
df = pd.read_csv(file_path)

# 目标变量和因素变量
target_variables = ['release_Doxy', 'release_TPC']
factors = ['PVA', 'Agarose', 'TPC_content', 'freeze_thaw cycle', 'time']

# 清理数据：删除含有缺失值的行
df_clean = df.dropna()

# 创建交互项
df_clean['PVA_time'] = df_clean['PVA'] * df_clean['time']
df_clean['Agarose_TPC_content'] = df_clean['Agarose'] * df_clean['TPC_content']
df_clean['PVA_Agarose'] = df_clean['PVA'] * df_clean['Agarose']
df_clean['PVA_freeze_thaw'] = df_clean['PVA'] * df_clean['freeze_thaw cycle']

# 执行方差分析（ANOVA）
anova_results_with_interaction = {}

for target in target_variables:
    anova_results_with_interaction[target] = {}
    for factor in factors + ['PVA_time', 'Agarose_TPC_content', 'PVA_Agarose', 'PVA_freeze_thaw']:
        # 按照因素的唯一值分组，确保每个组有足够的样本
        groups = [df_clean[df_clean[factor] == value][target] for value in df_clean[factor].unique()]
        
        # 如果分组不足（少于2个组或组内样本少于2），跳过该因素
        if len(groups) < 2 or any(len(group) < 2 for group in groups):
            anova_results_with_interaction[target][factor] = {'F-statistic': None, 'p-value': None, 'message': 'Insufficient data'}
            continue
        
        # 执行ANOVA检验
        f_stat, p_value = stats.f_oneway(*groups)
        anova_results_with_interaction[target][factor] = {'F-statistic': f_stat, 'p-value': p_value}

# 将结果转换为 DataFrame 并输出到 Excel 文件
result_data = []

for target in anova_results_with_interaction:
    for factor, stats in anova_results_with_interaction[target].items():
        result_data.append([target, factor, stats['F-statistic'], stats['p-value']])

# 转换为 DataFrame
anova_df = pd.DataFrame(result_data, columns=['Target Variable', 'Factor/Interaction', 'F-statistic', 'p-value'])

# 保存为 Excel 文件
output_file = 'anova_results_with_interaction.xlsx'
anova_df.to_excel(output_file, index=False)

output_file  # 返回文件路径供用户下载


输出包含交互项的数据集

In [None]:
import pandas as pd

# 读取数据
file_path = '样本池2.csv'  # 替换为你的文件路径
df = pd.read_csv(file_path)

# 清理数据：删除缺失值
df_clean = df.dropna()

# 创建显著交互项
df_clean['PVA_time'] = df_clean['PVA'] * df_clean['time']
df_clean['Agarose_TPC_content'] = df_clean['Agarose'] * df_clean['TPC_content']
df_clean['PVA_Agarose'] = df_clean['PVA'] * df_clean['Agarose']
df_clean['PVA_freeze_thaw'] = df_clean['PVA'] * df_clean['freeze_thaw cycle']

# 为release_Doxy创建包含显著交互项的新数据集
features_doxy = ['PVA', 'Agarose', 'TPC_content', 'freeze_thaw cycle', 'time', 
                 'PVA_time', 'Agarose_TPC_content', 'PVA_Agarose', 'PVA_freeze_thaw']
X_doxy = df_clean[features_doxy]
y_doxy = df_clean['release_Doxy']

# 为release_TPC创建包含显著交互项的新数据集
features_tpc = ['PVA', 'Agarose', 'TPC_content', 'freeze_thaw cycle', 'time', 
                'PVA_time', 'Agarose_TPC_content', 'PVA_Agarose', 'PVA_freeze_thaw']
X_tpc = df_clean[features_tpc]
y_tpc = df_clean['release_TPC']

# 合并X和y，创建最终的数据集
doxy_data = pd.concat([X_doxy, y_doxy], axis=1)
tpc_data = pd.concat([X_tpc, y_tpc], axis=1)

# 输出为Excel文件
output_file = 'output_with_interactions.xlsx'
with pd.ExcelWriter(output_file) as writer:
    doxy_data.to_excel(writer, sheet_name='release_Doxy', index=False)
    tpc_data.to_excel(writer, sheet_name='release_TPC', index=False)

print(f"文件已保存为 {output_file}")
