In [7]:
import pandas as pd
import plotly.graph_objects as go
import random
import numpy as np

def plot_expression_boxplot(df):
    '''
    df: Pandas DataFrame, it should have a gene_id column and several sample columns
    
    This function will create a boxplot of FPKM for each sample and show it.
    '''
    # 删除空值
    df = df.dropna()

    # 将输入的df第一列列名修改为gene_id
    df.rename(columns={df.columns[0]: 'gene_id'}, inplace=True)

    # 把数据从长格式变为宽格式以方便作图
    df_melt = df.melt(id_vars='gene_id', var_name='sample', value_name='FPKM')

    # 对FPKM取log
    df_melt['FPKM'] = np.log(df_melt['FPKM'] + 1)

    # 创建空白图形canvas并按照样本加入各自的箱线图
    fig = go.Figure()
    samples = df_melt['sample'].unique()
    for i, sample in enumerate(samples):
        # 使用随机颜色进行绘图
        random_color = '#%02X%02X%02X' % (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

        fig.add_trace(go.Box(
            y=df_melt[df_melt['sample'] == sample]['FPKM'], 
            name=sample,
            marker_color=random_color
        ))
    
    fig.update_layout(template='plotly_white', title='Distribution of expression volume', xaxis_title='Sample', yaxis_title='FPKM')
    
    # 重新设置y轴的范围
    fig.update_yaxes(range=[0, 10])

    # fig.show()
    return fig

In [8]:
# 读取CSV文件
df = pd.read_csv('demo-fpkm-all.csv')

# 调用函数
plot_expression_boxplot(df)