In [10]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.stats import shapiro
import numpy as np

# 选择图片和设置路径
target = 3
data_name = ['0618', '0854', '1066'][target - 1]

# 设置 matplotlib 使用支持中文的字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

column_names = ['R_1_1', 'G_1_2', 'B_1_3', 'R_2_1', 'G_2_2', 'B_2_3', 'R_3_1', 'G_3_2', 'B_3_3',
                'R_4_1', 'G_4_2', 'B_4_3', 'R_5_1', 'G_5_2', 'B_5_3', 'R_6_1', 'G_6_2', 'B_6_3',
                'R_7_1', 'G_7_2', 'B_7_3', 'R_8_1', 'G_8_2', 'B_8_3', 'R_9_1', 'G_9_2', 'B_9_3', 'Label']

data = pd.read_csv(f'../RGB_data/UniformSampler_{data_name}_3x3_multi_labelme.csv', skiprows=1, names=column_names)

In [11]:
# 去除标签列，仅保留RGB特征
features = data.iloc[:, :-1]

# 将所有特征列转换为数值类型，无法转换的设为 NaN
features = features.apply(pd.to_numeric, errors='coerce')

# 检查并移除包含 NaN 的行
features = features.dropna()

# 设置画图风格
sns.set(style="whitegrid")

# 指定要保存的列索引，分别对应第6、15、23列（索引为5、14、22）
save_indices = [5, 14, 22]  

for i in save_indices:
    col = features.columns[i]
    
    # 进行Shapiro-Wilk正态性检验
    stat, p_value = shapiro(features[col])
    
    # 创建单独的图
    plt.figure(figsize=(5, 4))
    
    # 绘制直方图和核密度估计
    sns.histplot(features[col], bins=20, kde=True)
    plt.title(f"Distribution of {col}")
    plt.xlabel("Features")
    plt.ylabel("Frequency")
    
    # 添加p值和正态分布判断
    normality_status = "Normal" if p_value > 0.05 else "Not Normal"
    plt.text(0.5, 0.8, f"p = {p_value:.4f}\n{normality_status}",
             ha='center', va='center', transform=plt.gca().transAxes,
             fontsize=10, color='red', bbox=dict(facecolor='white', alpha=0.7))
    
    # 保存图像
    plt.savefig(f"././Is_Gaussian_img/distribution_{data_name}_{col}.png", bbox_inches='tight')
    plt.close()  # 关闭当前图，避免多余图像显示
    print('finish')


finish
finish
finish
