In [4]:
library(DESeq2)
library(ggplot2)
library(plotly)
library(htmlwidgets)

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: 'BiocGenerics'


The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs


The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
    as.data.frame, basename, cbind, colnames, dirname, do.call,
    duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
    lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
    pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
    tapply, union, unique, unsplit, which.max, which.min



Attaching package: 'S4Vectors'


The following object is masked from 'package:utils':

    findMatches


The following objects are masked from 'package:base':

    I, expand.grid, unname


Loading required package: IRanges

Loading required package: GenomicRanges

Loading required package: GenomeInfoDb

Loa

In [5]:
count_df <- read.csv("./input-file/expression_matrix.csv", header = TRUE, row.names = 1)
sample_df <- read.csv("./input-file/samples_info.csv", header = TRUE, row.names = 1)

In [6]:
# 预处理
rownames(count_df) <- gsub("-", ".", rownames(count_df))
rownames(sample_df) <- gsub("-", ".", rownames(sample_df))
sample_df$Group <- gsub("-", ".", sample_df$Group)

In [7]:
sample_df$Group <- factor(sample_df$Group)

deseq2.obj1 <- DESeqDataSetFromMatrix(
    countData = count_df,
    colData = DataFrame(sample_df),
    design = ~Group
)

#DESeq函数将自动对表达量数据进行归一化、估计基因表达的离散度、从中提取差异表达基因，并计算调整后的p值和对数折叠变化值等统计信息
deseq2.obj2 <- DESeq(deseq2.obj1)

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing



In [8]:
## PCA
# 进行基于rlog转换的PCA分析，并以“condition”为分类变量，绘制样本的PCA图
dds = deseq2.obj2

# 使用estimateSizeFactors函数对dds进行样本大小因子估计,将对每个样本中的基因表达量进行归一化，以消除库大小之间的差异
dds <- estimateSizeFactors(dds)

# 使用rlog函数将dds对象中的基因表达量进行rlog转换.rlog转换是一种常用的数据转换方法，可以使数据更符合正态分布，从而提高PCA分析的效果
rld <- rlog(dds)

In [9]:
pca_data <- prcomp(t(assay(rld)))
plot_df <- as.data.frame(pca_data$x)

In [10]:
plot_df

Unnamed: 0_level_0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
C_1,-41.61522,0.1636911,-2.20000406,-4.6802805,-5.55383392,0.5923281,-1.0785956,-0.3684333,8.81676e-14
C_2,-43.3794,0.1241737,-1.5816211,-2.7987116,6.58605701,-0.4458624,0.6418912,0.2481501,1.004786e-13
C_3,-45.34542,-0.1015971,3.662255,7.1068121,-1.1569182,-0.183984,0.387761,0.1941333,8.879727e-14
WT_1,22.00612,23.8428269,6.77106068,-2.9883482,0.18081317,-2.2832441,-0.6529998,-1.8659735,9.929498e-14
WT_2,22.27299,23.2895949,-1.44834924,0.7358578,0.04021664,1.5730196,-1.570761,5.4729509,9.378685e-14
WT_3,21.12881,22.7784833,-5.38753463,2.5493372,-0.15595403,0.7697182,2.250073,-3.6723558,9.610836e-14
G_1,21.6661,-23.5021859,-2.38550481,1.6622207,0.55901579,-3.3522729,-4.8387622,-0.9275346,9.319661e-14
G_2,21.24907,-23.4057645,2.47793272,-0.6724299,0.7577944,6.2252195,-0.2086723,-1.1103989,9.640983e-14
G_3,22.01696,-23.1892224,0.09176545,-0.9144578,-1.25719087,-2.8949218,5.0700657,2.0294617,1.016595e-13


In [12]:
# 计算方差百分比
pca_var <- (pca_data$sdev^2 / sum(pca_data$sdev^2))[1:3] * 100

# 创建 3D PCA 散点图
fig_3d <- plot_ly(
  data = plot_df, x = ~PC1, y = ~PC2, z = ~PC3, text = rownames(plot_df), 
  color = sample_df$Group,
  width = 900, height = 600,
  marker = list(size = 6, line = list(width = 1, color = 'DarkSlateGray'))
) %>%
  add_markers() %>%
  layout(
    scene = list(
      xaxis = list(title = paste0('PC1: ', round(pca_var[1], 2), '%')),
      yaxis = list(title = paste0('PC2: ', round(pca_var[2], 2), '%')),
      zaxis = list(title = paste0('PC3: ', round(pca_var[3], 2), '%')),
      aspectmode = 'cube',  # 设置坐标轴比例为立方体
      aspectratio = list(x = 1, y = 1, z = 1)  # 设置三个轴的比例都为1
    ),
    template = "simple_white"
  )

# 将Plotly图转换为JSON
json_data <- plotly::plotly_json(fig_3d, jsonedit = FALSE)

# 将JSON数据写入文件
json_file_path <- "output-file/pca_3d.json"  # 指定要保存的文件路径
write(json_data, file = json_file_path)