<u>**ABCA7 protein levels by genotype**</u>
<a name="protein_levels"></a>

In [1]:
# intitial check
meta = read.csv('./processed_data/rosmap_proteomics//0.Traits-AGE_CENSORED.csv')
biospecimen = read.csv('./raw_data/metadata/ROSMAP_biospecimen_metadata.csv')
biospecimen_subset = biospecimen[biospecimen$assay=='TMT quantitation',]
rownames(biospecimen_subset) = biospecimen_subset$specimenID
biospecimen_subset = biospecimen_subset[meta$SpecimenID,]
biospecimen_subset = biospecimen_subset[biospecimen_subset$individualID!='GISpool',]
paste0('tissue used for TMT proteomics = ', unique(biospecimen_subset$tissue))

In [34]:
# load data
data = read.csv('./processed_data/rosmap_proteomics//3.cleanDat.csv', row.names = 'X')
meta = read.csv('./processed_data/rosmap_proteomics//0.Traits-AGE_CENSORED.csv')
all_samples_lof_genotypes = read.csv('./processed_data/rosmap_proteomics/all_samples_lof_genotypes.csv', check.names=F)
all_samples_lof_summary = read.csv('./processed_data/rosmap_proteomics/all_samples_lof_summary.csv', row.names = 'X')

In [26]:
all_meta = merge(meta, all_samples_lof_summary, by = 'projid')

In [51]:
temp = all_samples_lof_genotypes[all_samples_lof_genotypes$GENE=='ABCA7',]
temp2 = temp[,colnames(temp)%in%all_meta[all_meta$ABCA7LoF==1,'projid']]
index = rowSums(temp2=='0/1')>0
var_info = cbind(temp[index,1:18], temp2[index,])

In [89]:
all_data = readRDS('./processed_data/single_cell/stats_input_data.rds')
summary = all_data$summary

# plot genes of interest
rownames(all_meta) = all_meta$SampleID
all_meta = all_meta[rownames(all_meta)%in%colnames(data),]
all_meta$grp = all_meta$projid%in%rownames(summary)
df = as.data.frame(t(data[startsWith(rownames(data), c('ABCA7')),all_meta$SampleID]))
df$RBFOX3 = (t(data[startsWith(rownames(data), c('RBFOX3')),all_meta$SampleID]))
df$LOF = all_meta[rownames(df), 'ABCA7LoF']
df$grp = all_meta[rownames(df), 'grp']

colnames(df) = c('ABCA7', 'RBFOX3','LOF', 'grp')
df$projid = all_meta[rownames(df), 'projid']
df = na.omit(df)

In [100]:
library(reshape2)
temp = melt(var_info[, c('HGVS_C', 84653463,20201891,20201927,50403446,71648351,50105301)], id='HGVS_C')
temp = temp[temp$value=='0/1',]
rownames(temp) = temp$variable
df$var = ifelse(df$projid%in%rownames(temp), temp$HGVS_C, '')

In [101]:
write.csv(df, './processed_data//for_plotting/ABCA7_proteomics.csv')

<u>**Marker genes**</u>
<a name="marker genes"></a>

In [119]:
library(reshape2)
library(SingleCellExperiment)
library(tidyr)


Attaching package: 'tidyr'


The following object is masked from 'package:S4Vectors':

    expand


The following objects are masked from 'package:Matrix':

    expand, pack, unpack


The following object is masked from 'package:reshape2':

    smiths




In [103]:
ace_dir = './processed_data/single_cell/sce.rds'
ace = readRDS(ace_dir)

In [116]:
marker_genes = c('SYT1', 'NRGN', 'GAD1', 'AQP4', 'CSF1R', 'MBP', 'PLP1', 'VCAN',  'PDGFRB', 'FLT1')
print('getting marker genes')
marker_logcounts = logcounts(ace)[marker_genes,] 
print('melting')
marker_logcounts_melted = melt(as.matrix(marker_logcounts))

marker_logcounts_melted$celltype = colData(ace)[marker_logcounts_melted$Var2,'annotations']

df = as.data.frame(cbind(ace@colData$projid, ace@colData$annotations, ace@colData$ABCA7LoF))
colnames(df) = c('projid', 'celltype', 'LOF')
cells = unique(df$celltype)
df$celltype = factor(df$celltype, levels = cells[order(cells)])

[1] "getting marker genes"
[1] "melting"


In [117]:
write.csv(df, './processed_data/for_plotting/celltype_annos_qc.csv')
write.csv(marker_logcounts_melted, './processed_data/for_plotting/marker_logcounts_melted.csv')

In [121]:
# show individual-level correlation plot
all_data = readRDS('./processed_data/single_cell/stats_input_data.rds')

logcounts_ind = all_data$av_logcounts_by_ind_full_matrix
anno_names = unlist(lapply(1:length(colnames(logcounts_ind)), function(x) strsplit(colnames(logcounts_ind)[x], '[.]')[[1]][[1]]))
ind_cor = cor(logcounts_ind)

# show cross correlations
df = melt(ind_cor)[melt(lower.tri(ind_cor, diag = F))$value,]
df = df %>% separate(
  .,
  'Var1',
  c('celltype1', 'projid1'),
  sep ='[.]') %>% separate(
  .,
  'Var2',
  c('celltype2', 'projid2'),
  sep ='[.]') 
df_subset = df[(df$projid1!=df$projid2) & (df$celltype1 == df$celltype2),]
df_subset$value = as.numeric(df_subset$value)

means = aggregate(df_subset$value, list(df_subset$celltype1), 'mean')
order = means[order(means$x, decreasing = T),'Group.1']
df_subset$celltype1 = factor(df_subset$celltype1, levels = order)
                                               
# show median number of cells per subject detected
x = as.matrix(table(ace$projid, ace$annotations))
df = as.data.frame(apply(x, 2, function(i) median(i)))
colnames(df) = 'median'
df$celltype = rownames(df)
df$celltype = factor(df$celltype, levels = df$celltype[order(df$median, decreasing = T)])

# show number of individuals with >20 cells per cell type
x = as.matrix(table(ace$projid, ace$annotations))
df1 = as.data.frame(colSums(x>20))
colnames(df1) = 'N'
df1$celltype = rownames(df1)
df1$celltype = factor(df1$celltype, levels = df1$celltype[order(df1$N, decreasing = T)])

# show N subjects with no cells detected
x = as.matrix(table(ace$projid, ace$annotations))
df2 = as.data.frame(colSums(x==0))
colnames(df2) = 'N'
df2$celltype = rownames(df)
df2$celltype = factor(df2$celltype, levels = df2$celltype[order(df2$N, decreasing = T)])


In [122]:
data = list('ind_cor'=ind_cor, 'cross_cors'=df_subset, 'median_cells'=df, 'N_cells'=df1, 'no_cells'=df2)

In [123]:
saveRDS(data, './processed_data/for_plotting/celltype_anno_counts.rds')