In [43]:
import pandas as pd
import numpy as np

In [44]:
df_corr = pd.read_csv('./input_file/expression_matrix_corr.csv')
df_corr.head()

df_corr2 = pd.read_csv('./input_file/expression_matrix.csv')
# df_corr2只保留前200行
df_corr2 = df_corr2.iloc[:100, :]
df_corr2

Unnamed: 0,Geneid,C_1,C_2,C_3,T_1,T_2,T_3
0,MYCTH_2312856,1144,1731,1482,4998,7312,6275
1,MYCTH_2312859,69,116,109,97,117,117
2,MYCTH_2312860,626,952,880,3741,5382,4093
3,MYCTH_2072388,2456,4240,4395,2129,2653,2171
4,MYCTH_2312863,1145,1742,1270,7328,10275,8065
...,...,...,...,...,...,...,...
95,MYCTH_2313031,163,271,236,26,67,64
96,MYCTH_2313032,2,2,3,17,6,8
97,MYCTH_2313034,12,23,27,6,2,6
98,MYCTH_96411,9,13,36,1,0,2


In [45]:
import plotly.express as px
import plotly.io as pio

def plot_corr_sample(df_corr, width=900, height=600, color='RdBu_r'):
    """根据输入的表达矩阵，绘制样本之间的相关性热图

    Args:
        df_corr: 表达矩阵，行为基因，列为样本
        color: 颜色方案

    Returns:
    """
    # 去掉第一列的样本名
    df_corr = df_corr.iloc[:, 1:]
    df_corr = df_corr.round(6)
    
    # 计算样本之间的相关性
    correlation_matrix = df_corr.corr()

    # 绘制相关性矩阵的热图
    fig = px.imshow(correlation_matrix,
                    color_continuous_scale=color,
                    zmin=-1,  # 设置颜色比例尺的最小值
                    zmax=1)  # 设置颜色比例尺的最大值

    # 设置输出图的大小
    fig.update_layout(width=width, height=height)

    # 方案1:将fig对象转为json
    # fig_json = pio.to_json(fig)
    # return fig_json

    # 方案2:将fig转为html格式，返回html代码
    # fig_html = pio(fig, output_type='div', include_plotlyjs=False)        
    # return fig_html

    # 方案3:将fig转为html格式，保存为html文件
    # fig.write_html("corr_sample.html")
    # return "corr_sample.html"

    # 测试用
    return fig

# 示例调用
plot_corr_sample(df_corr,
          width=900,
          height=600,
          color='RdBu_r'
          )

In [64]:
def plot_corr_gene(df_corr, width=900, height=600, color='RdBu_r'):
    """根据输入的表达矩阵，绘制基因之间的相关性热图
    Args:
        df_corr: 表达矩阵，行为基因，列为样本
        color_scale: 颜色方案

    Returns:
        fig: 相关性热图 
    
    """
    # 预处理
    gene_ids = df_corr.iloc[:, 0]
    df_corr = df_corr.iloc[:, 1:]
    df_corr = df_corr.round(6)
    
    # 计算样本之间的相关性
    correlation_matrix = df_corr.T.corr()

    # 绘制相关性矩阵的热图
    fig = px.imshow(correlation_matrix,
                    color_continuous_scale=color,
                    zmin=-1,  # 设置颜色比例尺的最小值
                    zmax=1,  # 设置颜色比例尺的最大值
                    x=gene_ids, # 设置x轴为基因id
                    y=gene_ids) # 设置y轴为基因id

    # 设置输出图的大小
    fig.update_layout(width=width, height=height)
    


    # 方案1:将fig对象转为json
    # fig_json = pio.to_json(fig)
    # return fig_json

    # 方案2:将fig转为html格式，返回html代码
    # fig_html = plot(fig, output_type='div', include_plotlyjs=False)        
    # return fig_html

    # 方案3:将fig转为html格式，保存为html文件
    # fig.write_html("corr_gene.html")
    # return "corr_gene.html"

    return fig

# 示例调用
plot_corr_gene(df_corr,
               width=900,
               height=600,
               color='RdBu_r'
               )

In [78]:
import plotly.graph_objects as corr_network
import networkx as nx
import math

def create_gene_network(df_corr, width=1200, height=900, bubble_size=1, threshold=0.6, k_value=0.5, iterations_value=10, color='RdBu_r'):
    """根据输入的表达矩阵，绘制基因之间的相关性网络图
    
    Args:
        df_corr: 基因表达矩阵，其中行代表基因，列代表样本。
        width: 图形的宽度。
        height: 图形的高度。
        bubble_size: 节点的大小系数，这将影响每个节点的显示大小。
        threshold: 用于筛选出显著相关的基因对的相关系数阈值。
        k_value: NetworkX布局中的弹簧长度参数，影响节点间的距离。
        iterations_value: NetworkX布局算法的迭代次数。
        color: 颜色方案，用于节点的颜色编码。

    Returns:
    """

    # 预处理
    df_corr = df_corr.rename(columns={df_corr.columns[0]: 'gene_id'})
    df_corr = df_corr.set_index('gene_id')
    # if 'Geneid' in df_corr.columns:
    #     df_corr = df_corr.set_index('Geneid')
    df_corr = df_corr.round(2)
    correlation_matrix = df_corr.transpose().corr()

    # 使用networkx创建一个网络图
    G = nx.Graph()
    for gene1 in df_corr.index:
        for gene2 in df_corr.index:
            if gene1 != gene2:
                G.add_edge(gene1, gene2, weight=correlation_matrix.loc[gene1, gene2])

    threshold = threshold # 相关性系数
    edges = [(u, v) for (u, v, d) in G.edges(data=True) if abs(d['weight']) > threshold]
    G = G.edge_subgraph(edges).copy()  # 使用edges创建一个新的图，并使用copy()避免状态问题
    # pos设置,k越小则点越紧,iterations越大则点越稳定
    pos = nx.spring_layout(G, k=k_value, iterations=iterations_value)
    # 将位置作为节点属性添加到G中
    for node in G.nodes():
        G.nodes[node]['pos'] = pos[node]

    # 使用plotly创建网络图
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = G.nodes[edge[0]]['pos']
        x1, y1 = G.nodes[edge[1]]['pos']
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)
    
    # 分别创建正相关和负相关的边
    edge_x_pos, edge_y_pos = [], []
    edge_x_neg, edge_y_neg = [], []

    # 根据权重将边分为正负两组
    for edge in G.edges(data=True):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        if edge[2]['weight'] > 0:
            edge_x_pos.extend([x0, x1, None])
            edge_y_pos.extend([y0, y1, None])
        else:
            edge_x_neg.extend([x0, x1, None])
            edge_y_neg.extend([y0, y1, None])

    edge_trace_pos = corr_network.Scatter(
        x=edge_x_pos, y=edge_y_pos,
        line=dict(width=0.3, color='red'),
        hoverinfo='none',
        mode='lines'
    )
    
    edge_trace_neg = corr_network.Scatter(
        x=edge_x_neg, y=edge_y_neg,
        line=dict(width=0.3, color='blue'),
        hoverinfo='none',
        mode='lines'
    )

    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = G.nodes[node]['pos']
        node_x.append(x)
        node_y.append(y)

    node_trace = corr_network.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale=color,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=2))

    node_adjacencies = []
    node_text = []
    node_sizes = []  # 添加一个列表来存储基于节点连接数的大小

    # 如果df_corr有50行以上则运行以下代码
    if df_corr.shape[0] > 50:
        # 计算每个节点的连接数并设置节点大小
        for node, adjacencies in enumerate(G.adjacency()):
            node_adjacencies.append(len(adjacencies[1]))
            node_text.append(adjacencies[0])
            node_degree = len(adjacencies[1])
            scaled_size = 15 + (node_degree * bubble_size/10)
            node_sizes.append(scaled_size)
    
    else:
        # 计算每个节点的连接数并设置节点大小
        for node, adjacencies in enumerate(G.adjacency()):
            node_adjacencies.append(len(adjacencies[1]))
            node_text.append(adjacencies[0])
            node_degree = len(adjacencies[1])
            scaled_size = 15 + (node_degree * bubble_size/2)
            node_sizes.append(scaled_size)

    # norm = plt.Normalize(vmin=min(node_adjacencies), vmax=max(node_adjacencies)) # 将连接数映射到0-1范围  
    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text
    node_trace.marker.size = node_sizes  # 更新marker的大小

    # 创建图形
    fig = corr_network.Figure(data = [edge_trace_pos, edge_trace_neg, node_trace],
                    layout=corr_network.Layout(
                        title='Gene Co-expression Network',
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=0, l=0, r=0, t=40),
                        annotations=[
                            dict(
                                text="",
                                showarrow=False,
                                xref="paper", yref="paper",
                                x=0.005, y=-0.002)
                        ],
                        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                    )

    # 定义fig的布局，设置宽度和高度
    fig.update_layout(
        autosize=False,
        width=width, 
        height=height,
        template="plotly_white"
    )
    # fig.update_traces(marker_symbol='diamond') # 更改marker的形状

    # 方案1:将fig对象转为json
    # fig_json = pio.to_json(fig)
    # return fig_json

    # 方案2:将fig转为html格式，返回html代码
    # fig_html = plot(fig, output_type='div', include_plotlyjs=False)        
    # return fig_html

    # 方案3:将fig转为html格式，保存为html文件
    fig.write_html("corr_network-1.html")
    # return "corr_network.html"

    return fig


# 示例调用
create_gene_network(df_corr,
                    width=1000,
                    height=800,
                    bubble_size=6,
                    threshold=0.7,
                    k_value=0.5,
                    iterations_value=10,
                    color='RdBu_r'
                    )