PCA 2d

In [None]:
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

X = np.load('/home/chiahong/Documents/chihli_bee_sound/2024_rpi_vs_esp_Queenless/feature_extraction/Q2.csv_x.npy')
y = np.load('/home/chiahong/Documents/chihli_bee_sound/2024_rpi_vs_esp_Queenless/feature_extraction/Q2.csv_y.npy')

X_mean = np.mean(X, axis=2)

# 创建PCA模型，指定目标维度为2
pca = PCA(n_components=2)

# 进行PCA降维
X_pca = pca.fit_transform(X_mean)

# 创建一个包含所有可能标签的颜色映射
label_colors = {
    'Normal': 'g',
    'Queenless': 'r',
}

point_colors = [label_colors[label] for label in y]

# 绘制散点图
plt.figure(figsize=(10, 8))
for label in label_colors:
    indices = np.where(y == label)
    plt.scatter(X_pca[indices, 0], X_pca[indices, 1], c=label_colors[label], label=label, alpha=0.7)

plt.xlabel('Principal Component 1', fontsize=16, labelpad=20)
plt.ylabel('Principal Component 2', fontsize=16, labelpad=20)
plt.title('PCA distribution diagram of Q2_Esp in Queenless vs Normal', fontsize=16, pad=20)
plt.tick_params(axis='both', labelsize=16)
plt.legend(title='Classes', title_fontsize=16, fontsize=16)
plt.grid(True)
plt.show()


PCA 3d

In [None]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# 加载数据
X = np.load('/home/chiahong/Documents/chihli_bee_sound/2024_rpi_vs_esp_Queenless/feature_extraction/Q3.csv_x.npy')
y = np.load('/home/chiahong/Documents/chihli_bee_sound/2024_rpi_vs_esp_Queenless/feature_extraction/Q3.csv_y.npy')

# 计算特征的均值
X_mean = np.mean(X, axis=2)

# 创建PCA模型，指定目标维度为3
pca = PCA(n_components=3)

# 进行PCA降维
X_pca = pca.fit_transform(X_mean)

# 创建一个包含所有可能标签的颜色映射
label_colors = {
    'Normal': 'g',
    'Queenless': 'r',
}

point_colors = [label_colors[label] for label in y]

# 创建KMeans聚类模型
kmeans = KMeans(n_clusters=2)
kmeans.fit(X_pca)

# 计算聚类中心
cluster_centers = kmeans.cluster_centers_

# 计算每个样本点到聚类中心的距离
distances = np.sqrt(np.sum((X_pca - cluster_centers[kmeans.labels_]) ** 2, axis=1))

# 设置距离阈值，这里假设为聚类中心的标准差的2倍
threshold = 2 * np.std(distances)

# 过滤点，保留距离小于阈值的点
filtered_indices = np.where(distances < threshold)[0]

# 获取过滤后的点和对应的标签
X_filtered = X_pca[filtered_indices]
y_filtered = np.array(y)[filtered_indices]

# 绘制散点图
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(X_filtered[:, 0], X_filtered[:, 1], X_filtered[:, 2], c=[label_colors[label] for label in y_filtered])
ax.set_xlabel('Principal Component 1', fontsize=16, labelpad=20)
ax.set_ylabel('Principal Component 2', fontsize=16, labelpad=20)
ax.set_zlabel('Principal Component 3', fontsize=16, labelpad=30)
ax.set_title('PCA distribution diagram of Q3_Esp in Queenless vs Normal', fontsize=16)
ax.tick_params(axis='x', labelsize=16)  
ax.tick_params(axis='y', labelsize=16)  
ax.tick_params(axis='z', labelsize=16, pad=12)  
legend_labels = list(label_colors.keys())
legend_handles = [plt.Line2D([0], [0], marker='o', color='w', label=label, markerfacecolor=color, markersize=10) for label, color in label_colors.items()]
ax.legend(legend_handles, legend_labels, title='Classes', title_fontsize=16, fontsize=16, loc='upper left')
plt.show()
