In [2]:
## ---- Partition Cell Set Network Hallmark Analysis ---- 0681.02.00
## Load Analysis Parameters (Parm1)
## Load Network Utilities
## Loop Through Each Partition and Perform Analysis as Follows:
## Load Partition Cell Set
## Plot Hallmark Gene Heatmaps
## Create DE Analysis on VSCM Hallmark Subgroup Genes
## Create Volcano Plot for VSMC DE Hallmark Subgroup Genes

In [3]:
## Create a Working Input and Output Data Directory, If Id Does Not Exist
parentdir <- '/gpfs/group/torkamani/devans/'
datdir <- paste(parentdir, 'CDC2', sep = '')
if (!file.exists(datdir)) {
    dir.create(datdir)
}
setwd(datdir)

In [4]:
## Read the parameters file
ps <- read.table(file = 'parms.txt', stringsAsFactors = FALSE, header = TRUE)

In [7]:
## Load Monocle3 and Seurat Libraries
library(monocle3)
library(Seurat)
library(dplyr)
library(magrittr)
library(ggplot2)
library(gridExtra)
library(Matrix)
library(rhdf5)
library(grid)
library(igraph)
library(corpcor)
library(fdrtool)
library(readxl)
library(mixOmics)
library(EnhancedVolcano)

In [8]:
## Load modified version of Monocle 3 dot plot (adds as.ordered type)
source('~/code/CSC.Run2/0700.00.00.Carotid.SC.Utilities.01.r')

In [9]:
## Read the previously preprocessed downsampled cell set data object
down_stdycds <- readRDS(file = paste(ps$outdir,
            'Aggregated.downsampled.QC.NoDoublets.Repartitioned.rds', sep = ''))

In [10]:
## Build a gene short name to gene id (Ensembl) lookup
short2geneid <- fData(down_stdycds)@rownames
names(short2geneid) <- fData(down_stdycds)@listData$gene_short_name

In [11]:
## Build a gene id (Ensembl) to gene short name lookup
geneid2short <- fData(down_stdycds)@listData$gene_short_name
names(geneid2short) <- fData(down_stdycds)@rownames

In [12]:
## Create variables for how cells sets are organized
cellgrps <- c('healthy', 'diseased', 'healthy', 'diseased', 'healthy', 'diseased')
cellpats <- c('ID Number 1', 'ID Number 1', 'ID Number 2', 'ID Number 2', 'ID Number 3', 'ID Number 3')

In [13]:
## Define and Assign Cell Types
celltypes6 <- c('1-Macrophages',
                '2-Endothelial Cells',
                '3-VSMCs',
                '4-Natural Killer Cells',
                '5-Cytotoxic T Lymphocytes',
                '6-B Lymphocytes')

In [14]:
## Declare Tom's best genes for definiting cell types
toms_markers7 <- c('AIF1', 'CD14', 'CD68',
                   'VWF', 'PECAM1', 'ECSCR',
                   'CALD1', 'MYL9', 'TAGLN',
                   'NKG7', 'XCL', 'CTSW',
                   'CD2', 'TRAC', 'CD69',
                   'CD79A', 'MS4A1', 'IGKC')

toms_gene_ids7 <- short2geneid[toms_markers7]

In [56]:
## Round Three of Hallmark Genes

## vsmc-dis2_Epithelial-mesenchymal-transition
vsmc_dis3a_emt <- c('SPP1', 'POSTN', 'SERPINE1', 'TFPI2', 'IGFBP3',
                   'TGM2', 'EDIL3', 'GADD45A', 'IGFBP2', 'VCAM1',
                   'TGFB1', 'FN1', 'COL4A1', 'TNFRSF11B', 'COL4A2',
                   'MYLK', 'GPC1', 'RGS4', 'BGN', 'TIMP1', 'FAP',
                   'GJA1', 'CDH11', 'VCAN', 'TPM2', 'TPM4', 'FLNA',
                   'ACTA2', 'INHBA', 'CALD1', 'TAGLN', 'PMEPA1',
                   'VIM', 'FOXC2', 'MYL9', 'GLIPR1', 'LGALS1',
                   'HTRA1', 'MGP', 'ITGB1', 'TPM1', 'SPARC', 'EMP3'
                   , 'PCOLCE', 'PRRX1', 'FBLN5', 'DAB2', 'LAMA2',
                   'ID2', 'ELN', 'CTHRC1', 'IGFBP4', 'NT5E', 'CDH6',
                   'FSTL3', 'RHOB', 'GADD45B', 'LRP1', 'LOX', 'PMP22',
                   'JUN', 'MATN2', 'ECM1', 'FSTL1', 'THBS1', 'MSX1',
                   'SLIT3', 'MMP2', 'PCOLCE2', 'SLIT2', 'TGFBR3',
                   'ABI3BP', 'TIMP3', 'FBN1', 'NTM', 'FBLN2', 'SFRP4',
                   'DCN', 'GAS1', 'SFRP1', 'FBLN1', 'MFAP5')

## ec-dis2_Epithelial-mesenchymal-transition
ec_dis3a_emt <- c('FN1', 'BGN', 'DCN', 'SPP1', 'MMP2', 'ECM2',
                 'SERPINE1', 'THBS1', 'GLIPR1', 'TPM2', 'SERPINE2',
                 'ELN', 'LUM', 'MGP', 'CDH11', 'COL3A1', 'SFRP1',
                 'IGFBP3', 'COL1A2', 'FMOD', 'FOXC2', 'EMP3', 'COL12A1',
                 'LOX', 'ITGB5', 'LGALS1', 'FBLN5', 'PMEPA1', 'COL4A1',
                 'ABI3BP', 'EFEMP2', 'TAGLN', 'TIMP1', 'ITGAV',
                 'COL4A2', 'RHOB', 'THY1', 'CALD1', 'COL6A2',
                 'TNFRSF12A', 'HTRA1', 'GJA1', 'TGM2', 'FSTL1',
                 'COL5A2', 'CALU', 'SPARC', 'CXCL12', 'DST', 'TGFB1',
                 'PDLIM4', 'ITGB1', 'PPIB', 'CD59', 'TGFBR3', 'TIMP3',
                 'POSTN', 'IGFBP4', 'PMP22', 'IL6')

# length(vsmc_dis3a_emt)
# length(unique(vsmc_dis3a_emt))
# length(ec_dis3a_emt)
# length(unique(ec_dis3a_emt))
# length(unique(c(vsmc_dis3a_emt, ec_dis3a_emt)))
hm3agenes <- unique(c(vsmc_dis3a_emt, ec_dis3a_emt))
pca3agenes <- hm3agenes
hm3agenes_gid <- short2geneid[hm3agenes]



## vsmc-pa2_TNFa signaling via NFkB
vsmc_dis3b_tnf <- c('ACKR3', 'F3', 'KLF4', 'NR4A1', 'BTG2', 'TNFAIP2',
                   'GFPT2', 'ZFP36', 'PLPP3', 'EGR1', 'MYC', 'IL18',
                   'JUNB', 'FOSB', 'MAFF', 'IER2', 'SLC2A3', 'CEBPD',
                   'JUN', 'CSF1', 'KLF2', 'CCNL1', 'NFIL3', 'GADD45B',
                   'RHOB', 'FOS', 'TIPARP', 'NR4A2', 'BHLHE40', 'CDKN1A',
                   'PPP1R15A', 'TRIP10', 'ATF3', 'ID2', 'SOCS3', 'KLF9',
                   'KLF10', 'MCL1', 'NFE2L2', 'CEBPB', 'NFKBIA', 'PNRC1',
                   'EIF1', 'PDLIM5', 'NINJ1', 'PMEPA1', 'INHBA', 'MSC',
                   'TSC22D1', 'GADD45A', 'CCL4', 'PDE4B', 'SERPINE1')

#3 ec_pa2_TNFa signaling via NFkB',
ec_dis3b_tnf <- c('IL6', 'SPHK1', 'ATF3', 'ZFP36', 'ICAM1', 'IRF1', 'MAP3K8',
                 'TAP1', 'NFKBIA', 'CDKN1A', 'EGR1', 'SLC2A3', 'SOCS3',
                 'JUNB', 'KLF9', 'KLF10', 'KLF4', 'ETS2', 'IER2', 'FOSB',
                 'FOS', 'CEBPD', 'EIF1', 'JAG1', 'CXCL2', 'ABCA1', 'CEBPB',
                 'RHOB', 'PTGS2', 'ACKR3', 'PDE4B', 'PMEPA1', 'BMP2', 'EDN1',
                 'SERPINE1')

# length(vsmc_dis3b_tnf)
# length(unique(vsmc_dis3b_tnf))
# length(ec_dis3b_tnf)
# length(unique(ec_dis3b_tnf))
# length(unique(c(vsmc_dis3b_tnf, ec_dis3b_tnf)))
hm3bgenes <- unique(c(vsmc_dis3b_tnf, ec_dis3b_tnf))
pca3bgenes <- hm3bgenes
hm3bgenes_gid <- short2geneid[hm3bgenes]

Vsmc.m.11.30 <- c('TMSB4X', 'FXYD5', 'MYH10', 'RAMP1', 'ANXA2',
                  'TSC22D3', 'F2R', 'SFRP4', 'ECM1', 'ABHD14A',
                  'JDP2', 'DDIT4', 'VAT1', 'ABLIM1', 'S100A10',
                  'ZNF385D', 'FRZB', 'MAP1B', 'ZNF385B', 'GAP43',
                  'PDE5A', 'PRSS35', 'CCDC68', 'SH3BGRL3', 'UXT',
                  'CDKN2C', 'LMO7', 'ENPP2', 'NDNF', 'MCUB', 'HTR2B',
                  'MBNL1-AS1', 'TBC1D12', 'PTN', 'IL17D', 'SFT2D1',
                  'IGSF10', 'BHLHE41', 'NUCKS1', 'HACD3', 'IL18',
                  'TPPP3', 'EMP3', 'CLIC2', 'ATF5', 'MSC-AS1', 'FBLN2',
                  'LTBP4', 'F10', 'FSTL1', 'FBN1', 'TNXB', 'PCOLCE2',
                  'PLAT', 'PLPP3', 'SLIT3', 'CD248', 'AKAP12', 'PLBD1',
                  'SEMA3C', 'MFAP5', 'C17orf58', 'METRNL', 'UGDH',
                  'HSD11B1', 'TGFBR3', 'TSKU', 'FLRT2', 'SEMA3B',
                  'PI16', 'RAB32', 'DCLK1', 'CD55', 'LINC01133',
                  'FGFR1', 'CSF1', 'SLC16A7', 'NT5E', 'ADAMTSL4',
                  'HTRA3', 'FAM177A1', 'C1QTNF3', 'PPP2R2A', 'GPC1',
                  'EBF2', 'AMOTL2', 'NTM', 'CMTM3', 'CADM3')

Ec.m.35 <- c('FEZ1', 'OLFM1', 'ARHGAP18', 'COL4A1', 'MEDAG', 'COL4A2',
             'PDGFD', 'IGFBP7', 'RGS3', 'PGF', 'ITGA6', 'MGLL', 'ACKR3',
             'ANGPT2', 'PLXNA4', 'EMCN', 'TIMP3', 'GSN', 'FILIP1', 'LAMB1',
             'ELK3', 'ARPC2', 'NES', 'MXD4', 'PLVAP', 'ESM1', 'CITED4',
             'TMED3', 'EIF4EBP1', 'RFLNB')
hm3cgenes <- unique(c(Vsmc.m.11.30, Ec.m.35))
pca3cgenes <- hm3cgenes
hm3cgenes_gid <- short2geneid[hm3cgenes]



In [57]:
length(hm3agenes)
length(hm3agenes_gid)

length(hm3bgenes)
length(hm3bgenes_gid)

In [47]:
hm <- c('ABCA1', 'AC129492.1', 'ACKR3', 'AREG', 'ATF3', 'ATP2B1', 'B4GALT1', 'B4GALT5', 'BCL2A1',
  'BCL3', 'BCL6', 'BHLHE40', 'BIRC2', 'BIRC3', 'BMP2', 'BTG1', 'BTG2', 'BTG3', 'CCL2', 'CCL20',
  'CCL4', 'CCL5', 'CCN1', 'CCND1', 'CCNL1', 'CCRL2', 'CD44', 'CD69', 'CD80', 'CD83', 'CDKN1A',
  'CEBPB', 'CEBPD', 'CFLAR', 'CLCF1', 'CSF1', 'CSF2', 'CXCL1', 'CXCL10', 'CXCL11', 'CXCL2',
  'CXCL3', 'CXCL6', 'DDX58', 'DENND5A', 'DNAJB4', 'DRAM1', 'DUSP1', 'DUSP2', 'DUSP4', 'DUSP5',
  'EDN1', 'EFNA1', 'EGR1', 'EGR2', 'EGR3', 'EHD1', 'EIF1', 'ETS2', 'F2RL1', 'F3', 'FJX1', 'FOS',
  'FOSB', 'FOSL1', 'FOSL2', 'FUT4', 'G0S2', 'GADD45A', 'GADD45B', 'GCH1', 'GEM', 'GFPT2',
  'GPR183', 'HBEGF', 'HES1', 'ICAM1', 'ICOSLG', 'ID2', 'IER2', 'IER3', 'IER5', 'IFIH1', 'IFIT2',
  'IFNGR2', 'IL12B', 'IL15RA', 'IL18', 'IL1A', 'IL1B', 'IL23A', 'IL6', 'IL6ST', 'IL7R', 'INHBA',
  'IRF1', 'IRS2', 'JAG1', 'JUN', 'JUNB', 'KDM6B', 'KLF10', 'KLF2', 'KLF4', 'KLF6', 'KLF9',
  'KYNU', 'LAMB3', 'LDLR', 'LIF', 'LITAF', 'MAFF', 'MAP2K3', 'MAP3K8', 'MARCKS', 'MCL1', 'MSC',
  'MXD1', 'MYC', 'NAMPT', 'NFAT5', 'NFE2L2', 'NFIL3', 'NFKB1', 'NFKB2', 'NFKBIA', 'NFKBIE',
  'NINJ1', 'NR4A1', 'NR4A2', 'NR4A3', 'OLR1', 'PANX1', 'PDE4B', 'PDLIM5', 'PFKFB3', 'PHLDA1',
  'PHLDA2', 'PLAU', 'PLAUR', 'PLEK', 'PLK2', 'PLPP3', 'PMEPA1', 'PNRC1', 'PPP1R15A', 'PTGER4',
  'PTGS2', 'PTPRE', 'PTX3', 'RCAN1', 'REL', 'RELA', 'RELB', 'RHOB', 'RIPK2', 'RNF19B', 'SAT1',
  'SDC4', 'SERPINB2', 'SERPINB8', 'SERPINE1', 'SGK1', 'SIK1', 'SLC16A6', 'SLC2A3', 'SLC2A6',
  'SMAD3', 'SNN', 'SOCS3', 'SOD2', 'SPHK1', 'SPSB1', 'SQSTM1', 'STAT5A', 'TANK', 'TAP1', 'TGIF1',
  'TIPARP', 'TLR2', 'TNC', 'TNF', 'TNFAIP2', 'TNFAIP3', 'TNFAIP6', 'TNFAIP8', 'TNFRSF9', 'TNFSF9',
  'TNIP1', 'TNIP2', 'TRAF1', 'TRIB1', 'TRIP10', 'TSC22D1', 'TUBB2A', 'VEGFA', 'YRDC', 'ZBTB10',
  'ZC3H12A', 'ZFP36')

In [79]:
length(hm)
sum(hm %in% vsmc_dis3b_tnf)
length(vsmc_dis3b_tnf)
sum(hm %in% ec_dis3b_tnf)

vsmc_dis3b_tnf[which(!(vsmc_dis3b_tnf %in% hm))]
ec_dis3b_tnf[which(!(ec_dis3b_tnf %in% hm))]

short2geneid[vsmc_dis3b_tnf[which(!(vsmc_dis3b_tnf %in% hm))]]

tot <- union(vsmc_dis3b_tnf, ec_dis3b_tnf)
length(setdiff(vsmc_dis3b_tnf, ec_dis3b_tnf))
length(union(ec_dis3b_tnf, vsmc_dis3b_tnf))
length(intersect(vsmc_dis3b_tnf, ec_dis3b_tnf))
x <- rbind(c(200-66, 32), c(13, 21))
qval <- fisher.test(x)$p.value * 31 * 36 # Bonferroni Correction for All Module Permutations
qval
qval * 50 ## 50 hallmarks
fisher.test(x)$p.value

In [78]:
31*36

## Baseline EMT Hallmarks Heatmap

In [16]:
## Get the Expression Set vsmc and ec, with just the EMT hallmark genes
ec_vsmc_bool <- (colData(down_stdycds)$assigned_cell_type == celltypes6[2]) | 
                (colData(down_stdycds)$assigned_cell_type == celltypes6[3])
hm3a_cds <- down_stdycds[fData(down_stdycds)@rownames %in% short2geneid[pca3agenes], ec_vsmc_bool]
colData(hm3a_cds)$condtype <- paste(c('vsmc', 'ec')[(colData(hm3a_cds)$assigned_cell_type == celltypes6[2]) + 1],
                                  c('hea', 'dis')[(colData(hm3a_cds)$condition == 'diseased') + 1],
                                                   sep = '-')
colData(hm3a_cds)$sidecolor[colData(hm3a_cds)$condtype == 'ec-dis'] <- 'red'
colData(hm3a_cds)$sidecolor[colData(hm3a_cds)$condtype == 'ec-hea'] <- 'blue'
colData(hm3a_cds)$sidecolor[colData(hm3a_cds)$condtype == 'vsmc-dis'] <- 'orange'
colData(hm3a_cds)$sidecolor[colData(hm3a_cds)$condtype == 'vsmc-hea'] <- 'cyan'
cttable <- table(colData(hm3a_cds)$sidecolor)
smallset <- min(cttable)
cells2keep <- NULL
set.seed(101)
for (cellset_col in names(cttable)) {
    cellsetrow <-  which(colData(hm3a_cds)$sidecolor == cellset_col)
    cells2keep <- c(cells2keep, sample(cellsetrow, smallset, replace = FALSE))
}
hm3a_cds <- hm3a_cds[, cells2keep] 

table(colData(hm3a_cds)$sidecolor)
expdat3a <- as.matrix(exprs(hm3a_cds))
rownames(expdat3a) <- geneid2short[rownames(expdat3a)] # hm3agenes


  blue   cyan orange    red 
   449    449    449    449 

In [17]:
## This is a new round of heatmaps, with new genes and down sampled cell tpyes
pf1 <- 'outputdat/Hallmark.Heatmap.comp.bin.comp.bin.down.emt3genes.OnOff2Plus.preclustered.Rev5.pdf'
pf2 <- 'outputdat/Hallmark.Heatmap.comp.bin.comp.bin.down.emt3genes.OnOff2Plus.fullyclustered.Rev5.pdf'
pf3 <- 'outputdat/Hallmark.Dot.down.EMT3genes.Rev5.pdf'

vsmc_dw3a <- which(colData(hm3a_cds)$condtype == 'vsmc-dis')
vsmc_hw3a <- which(colData(hm3a_cds)$condtype == 'vsmc-hea')
ec_dw3a <- which(colData(hm3a_cds)$condtype == 'ec-dis')
ec_hw3a <- which(colData(hm3a_cds)$condtype == 'ec-hea')
dims3a <- dim(expdat3a)
expdatbo3a <- (expdat3a > 1) * 1
distmeth <- c('binary', 'binary')
clustmeth = c('complete', 'complete')

my_palettebol <- colorRampPalette(c('#660066', 'yellow'))(n = 2)
legendlstbol <- list(legend = c('EC DIS', 'EC HEA', 'VSMC DIS', 'VSMC HEA'),
              col = c('red', 'blue', 'orange', 'cyan'), title = 'Cell Types', cex = .7)

vsmc_dcim3a <- cim(expdatbo3a[, vsmc_dw3a], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3a_cds)$sidecolor[vsmc_dw3a], symkey = FALSE)
vsmc_hcim3a <- cim(expdatbo3a[, vsmc_hw3a], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3a_cds)$sidecolor[vsmc_hw3a], symkey = FALSE)
ec_dcim3a <- cim(expdatbo3a[, ec_dw3a], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3a_cds)$sidecolor[ec_dw3a], symkey = FALSE)
ec_hcim3a <- cim(expdatbo3a[, ec_hw3a], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3a_cds)$sidecolor[ec_hw3a], symkey = FALSE)

## The is the combined cell types
parthclust <- c(colnames(vsmc_dcim3a$mat), colnames(vsmc_hcim3a$mat),
                colnames(ec_dcim3a$mat), colnames(ec_hcim3a$mat))
colornames <-colData(hm3a_cds)$sidecolor
names(colornames) <- colnames(expdatbo3a)
cim(expdatbo3a[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'none' ,save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colornames[parthclust], symkey = FALSE)

## Repeated from above, this time to files
pdf(pf1, width = 8, height = 12)
cim(expdatbo3a[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'row',
    col.sideColors = colornames[parthclust], symkey = FALSE)
dev.off()

cim_data <- cim(expdatbo3a[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'row', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colornames[parthclust], symkey = FALSE)

pdf(pf2, width = 8, height = 12)
cim_data2 <- cim(expdatbo3a[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'both', 
    col.sideColors = colornames[parthclust], symkey = FALSE)
dev.off()

specialglist <- short2geneid[cim_data$row.names]
pdf(pf3, width = 3.25, height = 16)
plot_genes_by_group2(hm3a_cds,
                    axis_order = "group_marker",
                    specialglist,
                    group_cells_by = "condtype",
                    ordering_type = "as_ordered", # "cluster_row_col" "as_ordered"
                    max.size = 3,
                    norm_method = 'size_only',
                    scale_max = 3, scale_min = -3)
dev.off()

In [18]:
## Compute subgroup Diff Exp
ncells <- length(colnames(cim_data2$mat))
totlen <- 6.746
cut0 <- 0
cut1 <- as.integer(ncells * 2.595/totlen)
cut2 <- as.integer(ncells * 4.027/totlen)
cut3 <- as.integer(ncells * 5.654/totlen)
cut4 <- as.integer(ncells * totlen/totlen)
# c(cut0, cut1, cut2, cut3, cut4)
grp1 <- colnames(cim_data2$mat)[cut1:cut2]
grp2 <- colnames(cim_data2$mat)[cut3:cut4]
hm3a_cds_grp1_2 <- hm3a_cds[, c(grp1, grp2)]
# dim(hm3a_cds_grp1_2)

## Get a boolean selector for cells based on group
grp1bool <- hm3a_cds_grp1_2@colData@rownames %in% grp1
grp2bool <- hm3a_cds_grp1_2@colData@rownames %in% grp2

## Tag the two groups of cells
hm3a_cds_grp1_2@colData$grp[grp1bool] <- 'group1'
hm3a_cds_grp1_2@colData$grp[grp2bool] <- 'group2'
# table(colData(hm3a_cds_grp1_2)$grp)

In [19]:
## Compute the group differential expression
gene_emt_fits_grp <- fit_models(hm3a_cds_grp1_2, cores = 4,
                                model_formula_str = "~grp")
emt_coef <- coefficient_table(gene_emt_fits_grp)
            
            # Extract the relavent data from the DE tables
            emt_terms <- emt_coef %>% filter(term == "grpgroup2")
            emt_termsb <- emt_terms %>% filter(status == 'OK')
            emt_termsc <- as.data.frame(emt_termsb[,c(1, 4:12)])
            rownames(emt_termsc) <- emt_termsc[,1]
            emt_termsd <- emt_termsc[rownames(cim_data2$mat)[dim(cim_data2$mat)[1]:1], ]
            write.table(emt_termsd,
                        'outputdat/Diff.EMT.HallmarkGroups.RemoveFail.NoModels.rev1.txt',
                        col.names = TRUE, row.names = FALSE)

In [20]:
# emt_termsd[c('PCOLCE', 'LUM', 'HTRA1', 'FLNA', 'MYLK'),]
# hist(emt_termsd$normalized_effect, breaks = 5)
# hist(log10(emt_termsd$q_value), breaks = 5)

In [21]:
## Perform volcano plot for above Hallmark Subgroup DE Genes
g11 <- EnhancedVolcano(emt_termsd,  lab = emt_termsd$gene_short_name,
        x = 'normalized_effect', y = 'q_value',
        xlim = c(-7, 7), pCutoff = 5E-2, FCcutoff = 0.5,
        xlab = "Normalized Effect", ylab = '-Log10(q_value)',
        legend = c("NS", "Norm. Effect", "q", "q & Norm. Effect"),
        title = paste('Volcano Plot for VSMC EMT Subgroups with',
                      '\nCustomized Cutoffs', sep = ''))


plotfp <- 'outputdat/Volcano.Plot.VSMC.EMT.Hallmark.Subgroups.Customized.Cutoffs.Rev2.pdf'     
pdf(plotfp, width = 10, height = 10)
    print(g11)
dev.off()

In [22]:
## Get the Expression Set vsmc and ec, with just all the genes
ec_vsmc_bool <- (colData(down_stdycds)$assigned_cell_type == celltypes6[2]) | 
                (colData(down_stdycds)$assigned_cell_type == celltypes6[3])
hm3c_cds <- down_stdycds[, ec_vsmc_bool]
colData(hm3c_cds)$condtype <- paste(c('vsmc', 'ec')[(colData(hm3c_cds)$assigned_cell_type == celltypes6[2]) + 1],
                                  c('hea', 'dis')[(colData(hm3c_cds)$condition == 'diseased') + 1],
                                                   sep = '-')
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'ec-dis'] <- 'red'
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'ec-hea'] <- 'blue'
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'vsmc-dis'] <- 'orange'
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'vsmc-hea'] <- 'cyan'
cttable <- table(colData(hm3c_cds)$sidecolor)
smallset <- min(cttable)
cells2keep <- NULL
set.seed(101)
for (cellset_col in names(cttable)) {
    cellsetrow <-  which(colData(hm3c_cds)$sidecolor == cellset_col)
    cells2keep <- c(cells2keep, sample(cellsetrow, smallset, replace = FALSE))
}
hm3c_cds <- hm3c_cds[, cells2keep] 

In [23]:
## Compute subgroup Diff Exp
ncells <- length(colnames(cim_data2$mat))
totlen <- 6.746
cut0 <- 0
cut1 <- as.integer(ncells * 2.595/totlen)
cut2 <- as.integer(ncells * 4.027/totlen)
cut3 <- as.integer(ncells * 5.654/totlen)
cut4 <- as.integer(ncells * totlen/totlen)
# c(cut0, cut1, cut2, cut3, cut4)
grp1 <- colnames(cim_data2$mat)[cut1:cut2]
grp2 <- colnames(cim_data2$mat)[cut3:cut4]
hm3c_cds_grp1_2 <- hm3c_cds[, c(grp1, grp2)]
# dim(hm3a_cds_grp1_2)

## Get a boolean selector for cells based on group
grp1bool <- hm3c_cds_grp1_2@colData@rownames %in% grp1
grp2bool <- hm3c_cds_grp1_2@colData@rownames %in% grp2

## Tag the two groups of cells
hm3c_cds_grp1_2@colData$grp[grp1bool] <- 'group1'
hm3c_cds_grp1_2@colData$grp[grp2bool] <- 'group2'
# table(colData(hm3a_cds_grp1_2)$grp)

In [24]:
## Compute the group differential expression, this time with all the genes
geneall_emt_fits_grp <- fit_models(hm3c_cds_grp1_2, cores = 4,
                                model_formula_str = "~grp")
emtall_coef <- coefficient_table(geneall_emt_fits_grp)
            
            # Extract the relavent data from the DE tables
            emtall_terms <- emtall_coef %>% filter(term == "grpgroup2")
            emtall_termsb <- emtall_terms %>% filter(status == 'OK')
            emtall_termsc <- as.data.frame(emtall_termsb[,c(1, 4:12)], stringsAsFactors = FALSE)
            # emtall_termsc <- emtall_termsc[!duplicated(emtall_termsc[,1]),] 
            # rownames(emtall_termsc) <- emtall_termsc[,1]
            write.table(emtall_termsc,
                        'outputdat/Diff.EMT.Allgenes.HallmarkGroups.RemoveFail.NoModels.rev1.txt',
                        col.names = TRUE, row.names = FALSE)

## Baseline TNF Hallmarks Heatmap

In [68]:
## Get the Expression Set vsmc and ec, with just the TNF hallmark Gene
ec_vsmc_bool <- (colData(down_stdycds)$assigned_cell_type == celltypes6[2]) | 
                (colData(down_stdycds)$assigned_cell_type == celltypes6[3])
hm3b_cds <- down_stdycds[fData(down_stdycds)@rownames %in% short2geneid[pca3bgenes], ec_vsmc_bool]
colData(hm3b_cds)$condtype <- paste(c('vsmc', 'ec')[(colData(hm3b_cds)$assigned_cell_type == celltypes6[2]) + 1],
                                  c('hea', 'dis')[(colData(hm3b_cds)$condition == 'diseased') + 1],
                                                   sep = '-')
colData(hm3b_cds)$sidecolor[colData(hm3b_cds)$condtype == 'ec-dis'] <- 'red'
colData(hm3b_cds)$sidecolor[colData(hm3b_cds)$condtype == 'ec-hea'] <- 'blue'
colData(hm3b_cds)$sidecolor[colData(hm3b_cds)$condtype == 'vsmc-dis'] <- 'orange'
colData(hm3b_cds)$sidecolor[colData(hm3b_cds)$condtype == 'vsmc-hea'] <- 'cyan'
cttable <- table(colData(hm3b_cds)$sidecolor)
smallset <- min(cttable)
cells2keep <- NULL
set.seed(101)
for (cellset_col in names(cttable)) {
    cellsetrow <-  which(colData(hm3b_cds)$sidecolor == cellset_col)
    cells2keep <- c(cells2keep, sample(cellsetrow, smallset, replace = FALSE))
}
hm3b_cds <- hm3b_cds[, cells2keep] 

table(colData(hm3b_cds)$sidecolor)
expdat3b <- as.matrix(exprs(hm3b_cds))
rownames(expdat3b) <- geneid2short[rownames(expdat3b)] # hm3agenes


  blue   cyan orange    red 
   449    449    449    449 

In [69]:
## This is a new round of heatmaps, with new genes and down sampled cell tpyes
## Rev 5 of these files were missing genes (IL6 and IRF1)
pf1 <- 'outputdat/Hallmark.Heatmap.comp.bin.comp.bin.down.tnf3genes.OnOff2Plus.preclustered.Rev6.pdf'
pf2 <- 'outputdat/Hallmark.Heatmap.comp.bin.comp.bin.down.tnf3genes.OnOff2Plus.fullyclustered.Rev6.pdf'
pf3 <- 'outputdat/Hallmark.Dot.down.TNF3genes.Rev6.pdf'

vsmc_dw3b <- which(colData(hm3b_cds)$condtype == 'vsmc-dis')
vsmc_hw3b <- which(colData(hm3b_cds)$condtype == 'vsmc-hea')
ec_dw3b <- which(colData(hm3b_cds)$condtype == 'ec-dis')
ec_hw3b <- which(colData(hm3b_cds)$condtype == 'ec-hea')
dims3b <- dim(expdat3b)
expdatbo3b <- (expdat3b > 1) * 1
distmeth <- c('binary', 'binary')
clustmeth = c('complete', 'complete')

my_palettebol <- colorRampPalette(c('#660066', 'yellow'))(n = 2)
legendlstbol <- list(legend = c('EC DIS', 'EC HEA', 'VSMC DIS', 'VSMC HEA'),
              col = c('red', 'blue', 'orange', 'cyan'), title = 'Cell Types', cex = .7)

vsmc_dcim3b <- cim(expdatbo3b[, vsmc_dw3b], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3b_cds)$sidecolor[vsmc_dw3b],
    symkey = FALSE)
vsmc_hcim3b <- cim(expdatbo3b[, vsmc_hw3b], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3b_cds)$sidecolor[vsmc_hw3b],
    symkey = FALSE)
ec_dcim3b <- cim(expdatbo3b[, ec_dw3b], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3b_cds)$sidecolor[ec_dw3b],
    symkey = FALSE)
ec_hcim3b <- cim(expdatbo3b[, ec_hw3b], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3b_cds)$sidecolor[ec_hw3b],
    symkey = FALSE)

## The is the combined cell types
parthclust <- c(colnames(vsmc_dcim3b$mat), colnames(vsmc_hcim3b$mat),
                colnames(ec_dcim3b$mat), colnames(ec_hcim3b$mat))
colornames <-colData(hm3b_cds)$sidecolor
names(colornames) <- colnames(expdatbo3b)
cim(expdatbo3b[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'none', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)

# ## Repeated from above, this time to files
pdf(pf1, width = 8, height = 12)
cim(expdatbo3b[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'row',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)
dev.off()

cim(expdatbo3b[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'row', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)

pdf(pf2, width = 8, height = 12)
cim_data <- cim(expdatbo3b[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'both',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)
dev.off()

specialglist <- short2geneid[cim_data$row.names]
pdf(pf3, width = 3.25, height = 12)
plot_genes_by_group2(hm3b_cds,
                    axis_order = "group_marker",
                    specialglist,
                    group_cells_by = "condtype",
                    ordering_type = "as_ordered", # "cluster_row_col" "as_ordered"
                    max.size = 3,
                    norm_method = 'size_only',
                    scale_max = 3, scale_min = -3)
dev.off()

## Special Plot Where Hallmark Genes Were Selected Based on Module Genes Overlap
### These Plots Ultimately Were not Used

In [59]:
## Get the Expression Set vsmc and ec, with just the TNF hallmark Gene
ec_vsmc_bool <- (colData(down_stdycds)$assigned_cell_type == celltypes6[2]) | 
                (colData(down_stdycds)$assigned_cell_type == celltypes6[3])
hm3c_cds <- down_stdycds[fData(down_stdycds)@rownames %in% short2geneid[pca3cgenes], ec_vsmc_bool]
colData(hm3c_cds)$condtype <- paste(c('vsmc', 'ec')[(colData(hm3c_cds)$assigned_cell_type == celltypes6[2]) + 1],
                                  c('hea', 'dis')[(colData(hm3c_cds)$condition == 'diseased') + 1],
                                                   sep = '-')
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'ec-dis'] <- 'red'
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'ec-hea'] <- 'blue'
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'vsmc-dis'] <- 'orange'
colData(hm3c_cds)$sidecolor[colData(hm3c_cds)$condtype == 'vsmc-hea'] <- 'cyan'
cttable <- table(colData(hm3c_cds)$sidecolor)
smallset <- min(cttable)
cells2keep <- NULL
set.seed(101)
for (cellset_col in names(cttable)) {
    cellsetrow <-  which(colData(hm3c_cds)$sidecolor == cellset_col)
    cells2keep <- c(cells2keep, sample(cellsetrow, smallset, replace = FALSE))
}
hm3c_cds <- hm3c_cds[, cells2keep] 

table(colData(hm3c_cds)$sidecolor)
expdat3c <- as.matrix(exprs(hm3c_cds))
rownames(expdat3c) <- geneid2short[rownames(expdat3c)]


  blue   cyan orange    red 
   449    449    449    449 

In [61]:
## This is a new round of heatmaps, with new genes and down sampled cell tpyes
pf1 <- 'outputdat/Hallmark.Heatmap.comp.bin.comp.bin.down.Vsmc.m.11.30.Ec.m.35.genes.OnOff2Plus.preclustered.Rev5.pdf'
pf2 <- 'outputdat/Hallmark.Heatmap.comp.bin.comp.bin.down.Vsmc.m.11.30.Ec.m.35.genes.OnOff2Plus.fullyclustered.Rev5.pdf'
pf3 <- 'outputdat/Hallmark.Dot.down.Vsmc.m.11.30.Ec.m.35.genes.Rev5.pdf'

vsmc_dw3c <- which(colData(hm3c_cds)$condtype == 'vsmc-dis')
vsmc_hw3c <- which(colData(hm3c_cds)$condtype == 'vsmc-hea')
ec_dw3c <- which(colData(hm3c_cds)$condtype == 'ec-dis')
ec_hw3c <- which(colData(hm3c_cds)$condtype == 'ec-hea')
dims3c <- dim(expdat3c)
expdatbo3c <- (expdat3c > 1) * 1
distmeth <- c('binary', 'binary')
clustmeth = c('complete', 'complete')

my_palettebol <- colorRampPalette(c('#660066', 'yellow'))(n = 2)
legendlstbol <- list(legend = c('EC DIS', 'EC HEA', 'VSMC DIS', 'VSMC HEA'),
              col = c('red', 'blue', 'orange', 'cyan'), title = 'Cell Types', cex = .7)

vsmc_dcim3c <- cim(expdatbo3c[, vsmc_dw3c], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3c_cds)$sidecolor[vsmc_dw3c],
    symkey = FALSE)
vsmc_hcim3c <- cim(expdatbo3c[, vsmc_hw3c], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3c_cds)$sidecolor[vsmc_hw3c],
    symkey = FALSE)
ec_dcim3c <- cim(expdatbo3c[, ec_dw3c], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3c_cds)$sidecolor[ec_dw3c],
    symkey = FALSE)
ec_hcim3c <- cim(expdatbo3c[, ec_hw3c], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'col', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colData(hm3c_cds)$sidecolor[ec_hw3c],
    symkey = FALSE)

## The is the combined cell types
parthclust <- c(colnames(vsmc_dcim3c$mat), colnames(vsmc_hcim3c$mat),
                colnames(ec_dcim3c$mat), colnames(ec_hcim3c$mat))
colornames <-colData(hm3c_cds)$sidecolor
names(colornames) <- colnames(expdatbo3c)
cim(expdatbo3c[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'none', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)

# ## Repeated from above, this time to files
pdf(pf1, width = 8, height = 13)
cim(expdatbo3c[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'row',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)
dev.off()

cim(expdatbo3c[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'row', save = 'pdf', name.save = 'temp.ready.to.delete',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)

pdf(pf2, width = 8, height = 13)
cim_data <- cim(expdatbo3c[, parthclust], dist.method = distmeth, clust.method = clustmeth,
    scale = "FALSE", center = "FALSE", col.names = FALSE, legend = legendlstbol,
    color = my_palettebol, cluster = 'both',
    col.sideColors = colornames[parthclust],
    symkey = FALSE)
dev.off()

specialglist <- short2geneid[cim_data$row.names]
pdf(pf3, width = 3.25, height = 13)
plot_genes_by_group2(hm3c_cds,
                    axis_order = "group_marker",
                    specialglist,
                    group_cells_by = "condtype",
                    ordering_type = "as_ordered", # "cluster_row_col" "as_ordered"
                    max.size = 3,
                    norm_method = 'size_only',
                    scale_max = 3, scale_min = -3)
dev.off()