In [2]:
## load useful libraries
suppressWarnings(suppressPackageStartupMessages(library(rhdf5)))
suppressWarnings(suppressPackageStartupMessages(library(qvalue)))
suppressWarnings(suppressPackageStartupMessages(library(dplyr)))
suppressWarnings(suppressPackageStartupMessages(library(cowplot)))
suppressWarnings(suppressPackageStartupMessages(library(tidyverse)))
suppressWarnings(suppressPackageStartupMessages(library(ggbeeswarm)))
suppressWarnings(suppressPackageStartupMessages(library(ggthemes)))
suppressWarnings(suppressPackageStartupMessages(library(scater)))
suppressWarnings(suppressPackageStartupMessages(library(SingleCellExperiment)))
suppressWarnings(suppressPackageStartupMessages(library(edgeR)))
suppressWarnings(suppressPackageStartupMessages(library(limma)))
suppressWarnings(suppressPackageStartupMessages(library(ggplot2)))
suppressWarnings(suppressPackageStartupMessages(library(ISLR)))
suppressWarnings(suppressPackageStartupMessages(library(GGally)))
suppressWarnings(suppressPackageStartupMessages(library(scran)))
suppressWarnings(suppressPackageStartupMessages(library(ComplexHeatmap)))
suppressWarnings(suppressPackageStartupMessages(library(circlize)))
suppressWarnings(suppressPackageStartupMessages(library(colorspace)))

In [3]:
sce = readRDS("/hps/nobackup/hipsci/scratch/singlecell_endodiff/data_processed/merged/20180618/sce_merged_afterqc_filt_allexpts.rds")

In [5]:
sce

class: SingleCellExperiment 
dim: 11231 36044 
metadata(4): log.exprs.offset log.exprs.offset log.exprs.offset
  log.exprs.offset
assays(3): tpm counts logcounts
rownames(11231): ENSG00000000003_TSPAN6 ENSG00000000419_DPM1 ...
  ENSG00000272325_NUDT3 ENSG00000272398_CD24
rowData names(17): exprs_collapsed_to ensembl_transcript_id ...
  is_intop2000hvg is_hvg
colnames(36044): 21843_1#10 21843_1#100 ... 24539_8#97 24539_8#98
colData names(123): adj_x adj_y ... princ_curve princ_curve_scaled01
reducedDimNames(1): PCA
spikeNames(1): ERCC

In [3]:
## HVGs (highly variable genes)
library(scran)
sce = normalize(sce)
design = model.matrix(~ experiment, data = colData(sce))
alt.fit = trendVar(sce, design = design, use.spikes = FALSE)
alt.decomp = decomposeVar(sce, alt.fit)

“spike-in transcripts in 'ERCC' should have their own size factors”

In [4]:
top.500hvgs = rownames(alt.decomp[order(alt.decomp$bio, decreasing = TRUE),])[1:500]
rowData(sce)$is_intop500hvg <- (rownames(sce) %in% top.500hvgs)
pca.500hvgs = prcomp(t(logcounts(sce[rowData(sce)$is_intop500hvg,])))$x

In [5]:
sce$PC1_top500hvgs = pca.500hvgs[,1]

In [6]:
# scale pseudotime (PC1, top 500hvgs) between 0 and 1
s = sce$PC1_top500hvgs
p1 = data.frame(pseudotime = s, day = sce$day)
head(p1)

Unnamed: 0,pseudotime,day
21843_1#10,-11.7695261,day1
21843_1#100,5.1315308,day1
21843_1#101,-0.6430211,day1
21843_1#102,-14.5080208,day1
21843_1#103,-5.8029849,day1
21843_1#105,-2.3442226,day1


In [7]:
# some cells will not be assigned to any stage
sce$stage = "not assigned"

In [8]:
# just assign all day0 cells to ips
sce[,sce$day == "day0"]$stage = "ips"
# still based on not scale PC1, since those are the criteria used for eQTLs
sce[,sce$PC1_top500hvgs  > -23 & sce$PC1_top500hvgs < 4 & sce$day %in% c("day1","day2")]$stage = "mesendo"
sce[,sce$PC1_top500hvgs  > 22 & sce$day %in% c("day2","day3")]$stage = "defendo"

In [9]:
sce_ips = sce[,sce$stage == "ips"]
sce_mesendo = sce[,sce$stage == "mesendo"]
sce_defendo = sce[,sce$stage == "defendo"]

In [10]:
mysce = sce_ips
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l.ips = paste0(l,"-day0")

In [11]:
length(l.ips)

In [12]:
sceset = sce_mesendo
mysce = sceset[,sceset$day == "day1"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l1 = paste0(l,"-day1")
mysce = sceset[,sceset$day == "day2"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l2 = paste0(l,"-day2")
l.mesendo <- c(l1,l2)

In [13]:
length(l.mesendo)

In [14]:
sceset = sce_defendo
mysce = sceset[,sceset$day == "day2"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l2 = paste0(l,"-day2")
mysce = sceset[,sceset$day == "day3"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l3 = paste0(l,"-day3")
l.defendo <- c(l2,l3)

In [15]:
length(l.defendo)

In [6]:
get_alphas_byexpt <- function(sceset, ngenes, l){
    n = length(l)
    alphas <- matrix(0,nrow = ngenes, ncol = n)
    for (i in 1:n){
        don = gsub("*-.*","",l[i])
        expt = gsub("*-.*","",gsub(".*-e","e",l[i]))
        day = gsub(".*-","",l[i])
        sc <- logcounts(sceset)[,sceset$donor_short_id == don & sceset$experiment == expt & sceset$day == day]
        c = 2 ** sc - 1
        if (length(c) == nrow(sceset)) {
            m <- c
            v <- c
        }
        else {
            m <- rowMeans(c)
            v <- apply(c,1,var)
        }
        y.loess <- loess(log2(v+1) ~ log2(m+1), span = 0.75, data.frame(x = log2(m+1), y = log2(v+1)))
        y.predict <- predict(y.loess, data.frame(x = log2(m+1)))
        alphas[,i] = log2(v+1) - y.predict
    }
    rownames(alphas) = rownames(sceset)
    colnames(alphas) = l
    alphas
}

In [None]:
#### phenotype files

In [15]:
alphas_ips = get_alphas_byexpt(sce_ips, nrow(sce_ips), l.ips)

In [16]:
head(alphas_ips)

Unnamed: 0,heth_1-expt_12-day0,jogf_2-expt_12-day0,pelm_3-expt_12-day0,vass_1-expt_12-day0,wibj_2-expt_12-day0,zapk_3-expt_12-day0,oaqd_3-expt_18-day0,paab_4-expt_18-day0,sita_1-expt_18-day0,toss_3-expt_18-day0,⋯,pipw_5-expt_44-day0,qehq_3-expt_44-day0,toco_5-expt_44-day0,uilk_3-expt_44-day0,ciwj_2-expt_45-day0,hajc_1-expt_45-day0,hecn_3-expt_45-day0,kuco_1-expt_45-day0,liqa_1-expt_45-day0,tert_1-expt_45-day0
ENSG00000000003_TSPAN6,-0.3442969,-0.2561692,-0.9548249,-1.34176662,0.2525518,-1.0164727,-0.10464142,-0.58032824,0.11891761,-0.76777287,⋯,-1.243250458,-0.4655523,-0.78188955,-0.2679966,-0.7180335,-0.5296899,-0.90405456,-0.87438009,-0.149317667,-0.7779745
ENSG00000000419_DPM1,-1.0514935,-0.2041174,-0.3276868,-0.22923586,0.18026564,-0.7068602,0.04927355,0.15825603,0.0282685,-0.95107324,⋯,-0.381577345,-0.4724743,-0.97828097,-0.255328,-0.3323666,-0.2908796,-0.36691404,-0.42940012,-0.397520347,-0.6672841
ENSG00000000457_SCYL3,-0.4788146,-0.7450249,-0.4537243,-0.2758395,-0.05931901,-0.2502215,-0.04891225,0.89742076,-0.12963603,0.09044089,⋯,-0.699585975,-0.4153946,-0.3240657,-0.3377796,-0.4829387,-0.4466199,-0.53362068,-0.24613035,0.059721763,-0.5994797
ENSG00000000460_C1orf112,-0.7312819,-0.2904255,1.0291748,0.03240118,-0.22764528,0.1665379,-0.6733283,0.5597013,-0.48053932,-0.12481105,⋯,0.000717624,-0.5245578,-0.16286372,-0.3100873,-0.5190415,-0.637908,-0.09177905,-0.42302292,-0.635804274,0.1562609
ENSG00000001036_FUCA2,-1.0857484,-0.6982946,-0.5535402,-1.05484372,-0.68306766,-1.1752613,-0.5052655,-0.7121891,0.29154267,-1.13094919,⋯,-0.281993882,-0.7629634,-0.45924095,-0.6157865,-0.5787906,-1.1960592,-0.90652529,-1.04799669,-0.44025304,-0.7941735
ENSG00000001084_GCLC,-0.2265324,-0.4693774,-0.1098633,-0.21712199,0.6978416,-0.7496548,-0.15165364,0.07095915,0.03665637,0.35903799,⋯,0.14714564,-0.1036783,-0.08049818,0.4100631,1.2386603,-0.1628617,-0.23172592,0.03571678,0.009957389,-0.1319196


In [17]:
alphas_mesendo = get_alphas_byexpt(sce_mesendo, nrow(sce_mesendo), l.mesendo)

In [19]:
head(alphas_mesendo)

Unnamed: 0,fafq_1-expt_09-day1,hiaf_2-expt_09-day1,iisa_3-expt_09-day1,joxm_1-expt_09-day1,lexy_1-expt_09-day1,wuye_2-expt_09-day1,fafq_1-expt_10-day1,garx_2-expt_10-day1,hayt_1-expt_10-day1,sebz_1-expt_10-day1,⋯,juuy_2-expt_44-day2,pipw_5-expt_44-day2,qehq_3-expt_44-day2,toco_5-expt_44-day2,uilk_3-expt_44-day2,ciwj_2-expt_45-day2,hajc_1-expt_45-day2,hecn_3-expt_45-day2,kuco_1-expt_45-day2,tert_1-expt_45-day2
ENSG00000000003_TSPAN6,-0.55734606,-0.3162606,0.03850426,-0.7320376,0.2879092,-0.6098239,-0.45141521,0.16447579,-0.36799622,0.59604233,⋯,-1.05769708,-1.24971867,-0.917179,-1.59486068,-1.413196,-4.440892e-16,0.0,-3.552714e-15,1.0683266,-0.9008921
ENSG00000000419_DPM1,-0.31067303,-1.2839812,-0.21963664,-0.3815413,0.9354593,-0.2770131,-0.04680137,-0.30281695,-0.7071789,0.49039454,⋯,-1.18791932,-0.08808045,-0.2306048,0.74516438,-2.4388375,8.881784e-16,-1.776357e-15,-4.440892e-15,-1.8936715,-1.9680701
ENSG00000000457_SCYL3,0.42376145,0.380856,-0.25682336,-0.5104772,-0.1852945,-0.0880798,0.03348179,-0.46820793,-0.81726338,0.88800626,⋯,-0.07310759,-0.49462151,0.3097475,1.1043945,-0.2283763,-7.112366e-16,2.220446e-16,-3.330669e-16,1.1761277,1.2617997
ENSG00000000460_C1orf112,0.32265502,-1.9569472,0.11362341,-0.1409322,1.1560959,-0.4756322,0.68899453,-0.11803455,0.73447802,-0.19054012,⋯,0.10346273,-0.59473649,-0.4545636,0.3356388,-0.2215318,-1.332268e-15,0.0,-1.776357e-15,-1.3573661,0.4012611
ENSG00000001036_FUCA2,0.02555808,-2.2694557,0.19786468,-0.4068388,-0.9688175,-0.1777112,-0.97352425,-0.86305456,-0.81963973,-0.03854668,⋯,1.05233149,-1.99944424,0.2330888,-1.05168538,-0.5934998,-8.881784e-16,0.0,-2.220446e-15,0.2166333,-0.643766
ENSG00000001084_GCLC,1.07173934,1.1620981,0.09476054,-0.5222253,-0.9060032,-0.9209471,0.67568851,-0.06611093,0.01245633,0.44820327,⋯,0.21033542,-0.12364973,0.1253449,-0.07465519,0.5052315,-7.21645e-16,1.110223e-16,-5.551115e-16,0.1984637,-2.6767779


In [18]:
alphas_defendo = get_alphas_byexpt(sce_defendo, nrow(sce_defendo), l.defendo)

In [9]:
input_files_dir = "/hps/nobackup/hipsci/scratch/singlecell_endodiff/data_processed/scQTLs/variance/alpha_20191119/"

In [21]:
write.table(alphas_ips, paste0(input_files_dir,"ips_pheno.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alphas_mesendo, paste0(input_files_dir,"mesendo_pheno.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alphas_defendo, paste0(input_files_dir,"defendo_pheno.txt"), sep = "\t", quote = F, col.names = NA)

In [None]:
#### covariate files

In [22]:
alpha.ips.pcs = prcomp(t(alphas_ips))$x[,1:10]
alpha.mesendo.pcs = prcomp(t(alphas_mesendo))$x[,1:10]
alpha.defendo.pcs = prcomp(t(alphas_defendo))$x[,1:10]

In [23]:
head(alpha.ips.pcs)

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10
heth_1-expt_12-day0,-8.392677,22.268965,-10.5083829,33.93315,-5.730639,-3.793529,13.2626776,-12.636835,45.951996,-2.170596
jogf_2-expt_12-day0,-4.373702,18.156382,1.7178113,35.56105,-4.562206,-1.828212,-0.9857813,-12.28836,17.479169,-16.275829
pelm_3-expt_12-day0,-7.217079,24.380799,-19.9656471,53.39299,-9.138919,-14.178775,-14.9422531,-4.759319,9.223569,16.54621
vass_1-expt_12-day0,-2.133099,15.673494,-42.1912479,42.89776,-10.726423,12.852814,0.4111664,-27.801692,-1.955153,18.822598
wibj_2-expt_12-day0,-2.528749,6.981219,0.3921856,12.30112,4.669768,-7.400069,11.3086811,-6.744121,2.687188,-1.312227
zapk_3-expt_12-day0,-2.793033,14.000756,-0.9128197,27.83672,-4.161931,-1.827889,-5.5991821,-3.743577,3.564278,-1.29244


In [24]:
write.table(alpha.ips.pcs, paste0(input_files_dir,"ips_covs.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alpha.mesendo.pcs, paste0(input_files_dir,"mesendo_covs.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alpha.defendo.pcs, paste0(input_files_dir,"defendo_covs.txt"), sep = "\t", quote = F, col.names = NA)

In [None]:
#### sample mapping files

In [10]:
make_sample_mapping <- function(pheno, sce){
    df <- data.frame(donor.expt.day = colnames(pheno))
    df$donor = gsub("*-.*","",df$donor.expt.day)
    df$expt = gsub(".*-","",df$donor.expt.day)
    df$day = gsub(".*-","",df$donor.expt.day)
    df1 <- data.frame(donor_long_id = unique(sce$donor_long_id), donor = unique(sce$donor_short_id))
    df2 <- left_join(df, df1, by = "donor")
    # genotype_samples phenotype_samples
    samples = df2[,c("donor_long_id","donor.expt.day")]
    colnames(samples) = c()
    samples
}

In [26]:
alpha.ips.samples = make_sample_mapping(alphas_ips, sce_ips)
alpha.mesendo.samples = make_sample_mapping(alphas_mesendo, sce_mesendo)
alpha.defendo.samples = make_sample_mapping(alphas_defendo, sce_defendo)

In [27]:
head(alpha.ips.samples)

0,1
HPSI0214i-heth_1,heth_1-expt_12-day0
HPSI1013i-jogf_2,jogf_2-expt_12-day0
HPSI0214i-pelm_3,pelm_3-expt_12-day0
HPSI0114i-vass_1,vass_1-expt_12-day0
HPSI0214i-wibj_2,wibj_2-expt_12-day0
HPSI0114i-zapk_3,zapk_3-expt_12-day0


In [28]:
write.table(alpha.ips.samples, paste0(input_files_dir,"ips_samples.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alpha.mesendo.samples, paste0(input_files_dir,"mesendo_samples.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alpha.defendo.samples, paste0(input_files_dir,"defendo_samples.txt"), sep = "\t", quote = F, col.names = NA)

In [None]:
#### feature variant filter files (at each stage, mean eQTLs)

In [11]:
leads.mean.ips = read.csv("/nfs/leia/research/stegle/acuomo/mean/day0/all_expts/leads.csv", row.names = 1)
leads.mean.mesendo = read.csv("/nfs/leia/research/stegle/acuomo/mean/mesendo_est_June20/leads.csv", row.names = 1)
leads.mean.defendo = read.csv("/nfs/leia/research/stegle/acuomo/mean/defendo_est_June20/leads.csv", row.names = 1)

In [31]:
alpha.ips.fvf = leads.mean.ips[,c("snp_id","feature")]
head(alpha.ips.fvf)

snp_id,feature
5_149826526_C_T,ENSG00000164587_RPS14
11_57283988_C_T,ENSG00000134809_TIMM10
12_56401085_G_A,ENSG00000197728_RPS26
17_79634162_T_G,ENSG00000214087_ARL16
6_31486901_T_C,ENSG00000198563_DDX39B
4_39446549_G_A,ENSG00000163682_RPL9


In [32]:
alpha.mesendo.fvf = leads.mean.mesendo[,c("snp_id","feature")]
alpha.defendo.fvf = leads.mean.defendo[,c("snp_id","feature")]

In [33]:
write.table(alpha.ips.fvf, paste0(input_files_dir,"ips_fvf.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alpha.mesendo.fvf, paste0(input_files_dir,"mesendo_fvf.txt"), sep = "\t", quote = F, col.names = NA)
write.table(alpha.defendo.fvf, paste0(input_files_dir,"defendo_fvf.txt"), sep = "\t", quote = F, col.names = NA)

In [16]:
mysce = sce[,sce$day == "day1"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l.day1 = paste0(l,"-day1")
length(l.day1)

In [17]:
mysce = sce[,sce$day == "day3"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l.day3 = paste0(l,"-day3")
length(l.day3)

In [None]:
#### all alphas

In [7]:
sceset = sce
mysce = sceset[,sceset$day == "day0"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l0 = paste0(l,"-day0")
mysce = sceset[,sceset$day == "day1"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l1 = paste0(l,"-day1")
mysce = sceset[,sceset$day == "day2"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l2 = paste0(l,"-day2")
mysce = sceset[,sceset$day == "day3"]
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }
l3 = paste0(l,"-day3")
l.all <- c(l0,l1,l2,l3)

In [8]:
alphas_all = get_alphas_byexpt(sce, nrow(sce), l.all)

In [None]:
head(alphas_all)

In [12]:
write.table(alphas_all, paste0(input_files_dir,"alphas_pheno.txt"), sep = "\t", quote = F, col.names = NA)

In [13]:
alphas.all.pcs = prcomp(t(alphas_all))$x[,1:10]

In [14]:
write.table(alphas.all.pcs, paste0(input_files_dir,"alphas_covs.txt"), sep = "\t", quote = F, col.names = NA)

In [17]:
alphas.all.samples = make_sample_mapping(alphas_all, sce)

In [18]:
write.table(alphas.all.samples, paste0(input_files_dir,"alphas_samples.txt"), sep = "\t", quote = F, col.names = NA)

In [37]:
all.leads = rbind(leads.mean.ips, leads.mean.mesendo, leads.mean.defendo)
all.leads = all.leads[order(all.leads$global_corr_p_value),]
# all.leads = all.leads[-which(duplicated(all.leads$feature)),]

In [38]:
alphas.all.fvf = all.leads[,c("snp_id","feature")]
head(alphas.all.fvf)

Unnamed: 0,snp_id,feature
21765,19_7710920_G_A,ENSG00000076924_XAB2
10841,19_8387207_G_A,ENSG00000233927_RPS28
21766,7_55802063_T_C,ENSG00000226278_PSPHP1
1,5_149826526_C_T,ENSG00000164587_RPS14
10842,4_39446549_G_A,ENSG00000163682_RPL9
21767,17_7207964_A_C,ENSG00000132507_EIF5A


In [39]:
write.table(alphas.all.fvf, paste0(input_files_dir,"alphas_fvf_all.txt"), sep = "\t", quote = F, col.names = NA)

In [25]:
get_alphas_noday <- function(sceset, ngenes, l){
    n = length(l)
    alphas <- matrix(0,nrow = ngenes, ncol = n)
    for (i in 1:n){
        don = gsub("*-.*","",l[i])
        expt = gsub(".*-","",l[i])
        sc <- logcounts(sceset)[,sceset$donor_short_id == don & sceset$experiment == expt]
        c = 2 ** sc - 1
        if (length(c) == nrow(sceset)) {
            m <- c
            v <- c
        }
        else {
            m <- rowMeans(c)
            v <- apply(c,1,var)
        }
        y.loess <- loess(log2(v+1) ~ log2(m+1), span = 0.75, data.frame(x = log2(m+1), y = log2(v+1)))
        y.predict <- predict(y.loess, data.frame(x = log2(m+1)))
        alphas[,i] = log2(v+1) - y.predict
    }
    rownames(alphas) = rownames(sceset)
    colnames(alphas) = l
    alphas
}

In [26]:
mysce = sce
mat = table(mysce$donor_short_id, mysce$experiment) 
ord = which(table(mysce$donor_short_id, mysce$experiment) > 0, arr.ind = T)
l = c()
for (i in 1:dim(ord)[1]){
    l[i] <- paste0(rownames(mat)[ord[i,1]],"-",colnames(mat)[ord[i,2]])
    }

In [27]:
l

In [28]:
alphas_noday = get_alphas_noday(sce, nrow(sce), l)

In [29]:
write.table(alphas_noday, paste0(input_files_dir,"noday_pheno.txt"), sep = "\t", quote = F, col.names = NA)

In [30]:
alphas.noday.pcs = prcomp(t(alphas_noday))$x[,1:10]

In [31]:
write.table(alphas.noday.pcs, paste0(input_files_dir,"noday_covs.txt"), sep = "\t", quote = F, col.names = NA)

In [32]:
alphas.noday.samples = make_sample_mapping(alphas_noday, sce)

In [34]:
head(alphas.noday.samples)

0,1
HPSI0314i-fafq_1,fafq_1-expt_09
HPSI1013i-hiaf_2,hiaf_2-expt_09
HPSI0114i-iisa_3,iisa_3-expt_09
HPSI0114i-joxm_1,joxm_1-expt_09
HPSI0114i-lexy_1,lexy_1-expt_09
HPSI1013i-wuye_2,wuye_2-expt_09


In [35]:
write.table(alphas.noday.samples, paste0(input_files_dir,"noday_samples.txt"), sep = "\t", quote = F, col.names = NA)

In [40]:
write.table(alphas.all.fvf, paste0(input_files_dir,"noday_fvf_all.txt"), sep = "\t", quote = F, col.names = NA)