In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# 读取csv文件
expression_matrix = pd.read_csv("expression_matrix.csv", index_col=0)
samples_info = pd.read_csv("samples_info.csv", index_col=0)

In [None]:
# 转置表达矩阵
expression_matrix = expression_matrix.transpose()

# 归一化表达矩阵
scaler = StandardScaler()
expression_matrix_normalized = scaler.fit_transform(expression_matrix)

# 进行PCA分析
pca = PCA(n_components=2)
pca_result = pca.fit_transform(expression_matrix_normalized)

# 将PCA结果合并到sample信息
pca_df = pd.DataFrame(pca_result, columns=['PC1', 'PC2'], index=expression_matrix.index)
pca_df.reset_index(level=0, inplace=True)
pca_df.columns = ['sample_id', 'PC1', 'PC2']
result_df = pca_df.merge(samples_info, left_on='sample_id', right_index=True)

# 使用plotly创建散点图
fig = px.scatter(result_df, x='PC1', y='PC2', color='condition', hover_data=['sample_id'], height=600, width=800)
fig.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGray')))
fig.update_layout(title='PCA Analysis of Transcription Data', xaxis_title='PC1', yaxis_title='PC2')

# 保存图像并显示
# fig.write_html('pca_plot.html', auto_open=True)
fig.show()

In [4]:
# 将上述代码封装成函数
def pca_analysis(expression_matrix, samples_info):
    """将表达矩阵和样本信息作为输入，进行PCA分析并绘制散点图
    
    Args:
        expression_matrix: 表达矩阵
        samples_info: 样本信息
    Returns:
        fig: 绘制的散点图
    """
    # 转置表达矩阵
    expression_matrix = expression_matrix.transpose()

    # 归一化表达矩阵
    scaler = StandardScaler()
    expression_matrix_normalized = scale r.fit_transform(expression_matrix)

    # 进行PCA分析
    pca = PCA(n_components=2)
    pca_result = pca.fit_transform(expression_matrix_normalized)

    # 将PCA结果合并到sample信息
    pca_df = pd.DataFrame(pca_result, columns=['PC1', 'PC2'], index=expression_matrix.index)
    pca_df.reset_index(level=0, inplace=True)
    pca_df.columns = ['sample_id', 'PC1', 'PC2']
    result_df = pca_df.merge(samples_info, left_on='sample_id', right_index=True)

    # 使用plotly创建散点图
    fig = px.scatter(result_df, x='PC1', y='PC2', color='condition', hover_data=['sample_id'], height=600, width=800)
    fig.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGray')))
    fig.update_layout(title='PCA Analysis of Transcription Data', xaxis_title='PC1', yaxis_title='PC2')

    # 保存图像并显示
    # fig.write_html('pca_plot.html', auto_open=True)
    return fig

# 调用函数
pca_analysis(expression_matrix, samples_info)