In [2]:
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(anndata)
library(ggExtra)
library(gridExtra)

“package ‘ggplot2’ was built under R version 4.3.2”
“package ‘tidyr’ was built under R version 4.3.2”
“package ‘readr’ was built under R version 4.3.2”
“package ‘dplyr’ was built under R version 4.3.2”
“package ‘stringr’ was built under R version 4.3.2”
── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.0     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[

## Creating an anndata object for benchmarking

Step 1: creating anndata object
* Load miRNA inferred activity, expression count and annotations
* Match and subset inferred activity to expression count
* Store activity and expression as layers in anndata object

Step 2: computing correlations coefficients 
* For each miRNA compute Spearman and Pearson correlation betweeen activity and expression 
    * Compute p-value 
* add to `var` variable in anndata object

Step 3: Add means
* compute mean expression and add to `var` variable
* compute mean mean pearson and spearman correlation and add to `uns` variable 



## Step 1

#### loading data

In [16]:
sample_anno <- readRDS("/faststorage/project/jsp_student_projects/shared_data/TCGA_counts/TCGA_sample_anno_match.rds")
head(sample_anno)

Unnamed: 0_level_0,sample_id,cancer_type,sample_submitter_id,cancer_name,primary_site,sample_type,tissue_type,tumor_descriptor,age_at_index,gender,vital_status,tissue_or_organ_of_origin,color
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,TCGA-EW-A6SA-01A-21R-A32P-07,BRCA,TCGA-EW-A6SA-01A,Breast Invasive Carcinoma,Breast,Primary Tumor,Tumor,Primary,59,male,Alive,"Breast, NOS",#ED1E91
2,TCGA-E2-A14W-01A-11R-A12D-07,BRCA,TCGA-E2-A14W-01A,Breast Invasive Carcinoma,Breast,Primary Tumor,Tumor,Primary,78,male,Alive,"Breast, NOS",#ED1E91
3,TCGA-EW-A1PD-01A-11R-A144-07,BRCA,TCGA-EW-A1PD-01A,Breast Invasive Carcinoma,Breast,Primary Tumor,Tumor,Primary,61,male,Alive,"Breast, NOS",#ED1E91
4,TCGA-55-1594-01A-01R-0946-07,LUAD,TCGA-55-1594-01A,Lung Adenocarcinoma,Lung,Primary Tumor,Tumor,Primary,68,male,Alive,"Lower lobe, lung",#D2C3DF
5,TCGA-49-6742-11A-01R-1858-07,LUAD,TCGA-49-6742-11A,Lung Adenocarcinoma,Lung,Solid Tissue Normal,Normal,Not Applicable,70,male,Dead,"Upper lobe, lung",#D2C3DF
6,TCGA-50-5932-11A-01R-1755-07,LUAD,TCGA-50-5932-11A,Lung Adenocarcinoma,Lung,Solid Tissue Normal,Normal,Not Applicable,75,male,Dead,"Upper lobe, lung",#D2C3DF


In [17]:
tSNE <- readRDS("/faststorage/project/jsp_student_projects/miRNA_DGD_DE_F2024/processed_data/mRna_tSNE.rds")
dim(tSNE)
head(tSNE)

Unnamed: 0_level_0,sample_id,tSNE_1,tSNE_2,cancer_type,sample_submitter_id,cancer_name,primary_site,sample_type,tissue_type,tumor_descriptor,age_at_index,gender,vital_status,tissue_or_organ_of_origin,color
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,TCGA-SR-A6MR-01A-11R-A35L-07,-33.888853,-13.92731,PCPG,TCGA-SR-A6MR-01A,Pheochromocytoma and Paraganglioma,Adrenal Gland,Primary Tumor,Tumor,Primary,41,male,Alive,Retroperitoneum,#E7C41D
2,TCGA-ZF-A9RN-01A-11R-A42T-07,-49.316755,20.83248,BLCA,TCGA-ZF-A9RN-01A,Bladder Urothelial Carcinoma,Bladder,Primary Tumor,Tumor,Primary,67,female,Dead,Posterior wall of bladder,#F9D2DB
3,TCGA-K4-A5RH-01A-11R-A30C-07,-17.742814,27.42942,BLCA,TCGA-K4-A5RH-01A,Bladder Urothelial Carcinoma,Bladder,Primary Tumor,Tumor,Primary,69,male,Alive,Trigone of bladder,#F9D2DB
4,TCGA-AB-2992-03A-01T-0735-13,-71.126769,17.87096,LAML,TCGA-AB-2992-03A,Acute Myeloid Leukemia,Bone Marrow,Primary Blood Derived Cancer - Peripheral Blood,Tumor,Primary,32,female,Dead,Bone marrow,#744C27
5,TCGA-S9-A6U0-01A-12R-A32Q-07,67.920451,-43.85719,LGG,TCGA-S9-A6U0-01A,Brain Lower Grade Glioma,Brain,Primary Tumor,Tumor,Primary,46,male,Dead,Cerebrum,#D49DC6
6,TCGA-DH-A66F-01A-11R-A29R-07,4.754665,15.83679,LGG,TCGA-DH-A66F-01A,Brain Lower Grade Glioma,Brain,Primary Tumor,Tumor,Primary,49,male,Alive,Cerebrum,#D49DC6


In [18]:
extended_anno <- readRDS("~/jsp_student_projects/shared_data/microRNA/hg19_humir.rds")
head(extended_anno)

Unnamed: 0_level_0,seq,name,species,seed7,seed7target,tcgamatcher,tcga1match,chartr,inOncomir,sum_effect,oncoTissue,generalMatcher,expMatch,expidx,memicor,totalTargets,geneTargets,pexpidx,htmed
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>
113,TGAGGTAGTAGGTTGTATAGTT,hsa-let-7a-5p,Homo sapiens,GAGGTAG,CTACCTC,hsa-let-7a,0,UGAGGUAGUAGGUUGUAUAGUU,True,notOncomir,notOncomir,7a,1;2;3,1;2;3,-0.24365084,1810,1634,2,48994.36
114,CTATACAATCTACTGTCTTTC,hsa-let-7a-3p,Homo sapiens,TATACAA,TTGTATA,hsa-let-7a,0,CUAUACAAUCUACUGUCUUUC,True,notOncomir,notOncomir,7a,1;3,1;2;3,0.04408362,4090,3103,2,48994.36
115,CTGTACAGCCTCCTAGCTTTCC,hsa-let-7a-2-3p,Homo sapiens,TGTACAG,CTGTACA,hsa-let-7a-2,2,CUGUACAGCCUCCUAGCUUUCC,True,notOncomir,notOncomir,7a,2,2,0.18436039,2434,2119,2,48994.36
116,TGAGGTAGTAGGTTGTGTGGTT,hsa-let-7b-5p,Homo sapiens,GAGGTAG,CTACCTC,hsa-let-7b,4,UGAGGUAGUAGGUUGUGUGGUU,True,notOncomir,notOncomir,7b,4,4,-0.09012501,1810,1634,4,73792.76
117,CTATACAACCTACTGCCTTCCC,hsa-let-7b-3p,Homo sapiens,TATACAA,TTGTATA,hsa-let-7b,4,CUAUACAACCUACUGCCUUCCC,True,notOncomir,notOncomir,7b,4,4,0.08668485,4090,3103,4,73792.76
118,TGAGGTAGTAGGTTGTATGGTT,hsa-let-7c-5p,Homo sapiens,GAGGTAG,CTACCTC,hsa-let-7c,5,UGAGGUAGUAGGUUGUAUGGUU,True,notOncomir,notOncomir,7c,5,5,-0.25704022,1810,1634,5,21967.36


In [19]:
anno_match <- readRDS("/faststorage/project/jsp_student_projects/shared_data/TCGA_counts/TCGA_isoform_counts/TCGA_mirna_anno_iso_match.rds")
head(anno_match)

Unnamed: 0_level_0,mature_iso_id,mirna_type,mature_iso_name_miRBase,mature_iso_validation_miRBase
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
1,MIMAT0000062,mature,hsa-let-7a-5p,experimental
2,MIMAT0010195,mature,hsa-let-7a-2-3p,experimental
3,MIMAT0000063,mature,hsa-let-7b-5p,experimental
4,MIMAT0004482,mature,hsa-let-7b-3p,experimental
5,MIMAT0000064,mature,hsa-let-7c-5p,experimental
6,MIMAT0026472,mature,hsa-let-7c-3p,experimental


In [20]:
anno <- readRDS("~/jsp_student_projects/shared_data/microRNA/hg38_humir.rds")
head(anno)

dim(anno)

Unnamed: 0_level_0,mature_iso_id,mirna_type,mature_iso_name_miRBase,mature_iso_validation_miRBase,seqs,seed_site,target_site
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<fct>,<chr>,<chr>
1,MIMAT0000062,mature,hsa-let-7a-5p,experimental,UGAGGUAGUAGGUUGUAUAGUU,GAGGTAG,CTACCTC
2,MIMAT0010195,mature,hsa-let-7a-2-3p,experimental,CUGUACAGCCUCCUAGCUUUCC,TGTACAG,CTGTACA
3,MIMAT0000063,mature,hsa-let-7b-5p,experimental,UGAGGUAGUAGGUUGUGUGGUU,GAGGTAG,CTACCTC
4,MIMAT0004482,mature,hsa-let-7b-3p,experimental,CUAUACAACCUACUGCCUUCCC,TATACAA,TTGTATA
5,MIMAT0000064,mature,hsa-let-7c-5p,experimental,UGAGGUAGUAGGUUGUAUGGUU,GAGGTAG,CTACCTC
6,MIMAT0026472,mature,hsa-let-7c-3p,experimental,CUGUACAACCUUCUAGCUUUCC,TGTACAA,TTGTACA


In [21]:
expression <- readRDS("~/jsp_student_projects/shared_data/TCGA_counts/TCGA_isoform_counts/TCGA_mirna_TPMs_iso_match.rds")

head(expression[, 1:10])
dim(expression)

Unnamed: 0_level_0,TCGA-EW-A6SA-01A-21R-A32P-07,TCGA-E2-A14W-01A-11R-A12D-07,TCGA-EW-A1PD-01A-11R-A144-07,TCGA-55-1594-01A-01R-0946-07,TCGA-49-6742-11A-01R-1858-07,TCGA-50-5932-11A-01R-1755-07,TCGA-95-7947-01A-11R-2187-07,TCGA-CG-4476-01A-01R-1157-13,TCGA-05-4410-01A-21R-1858-07,TCGA-CG-5717-01A-11R-1602-13
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MIMAT0000062,63867.181623,23471.88352,23744.03148,10814.656043,63666.796272,81765.707515,54512.365094,14946.069282,26675.567266,15070.158161
MIMAT0010195,1.87344,10.82235,19.88285,7.312141,1.181277,0.333778,8.221291,2.594279,2.622967,4.754704
MIMAT0000063,24488.104472,12387.26714,14280.77384,7513.746829,17680.159749,23417.299966,22533.736065,11602.044637,13533.358556,10516.403355
MIMAT0004482,29.975034,32.82781,38.13595,14.624282,23.182545,31.20816,38.091981,33.293238,39.508431,24.774509
MIMAT0000064,416.902773,2011.87573,3591.62456,873.278516,3916.520991,2962.772343,1217.025087,2883.972636,1242.466348,2425.899825
MIMAT0026472,2.123232,30.66334,81.81303,18.802648,5.020424,2.670218,17.538753,31.131339,2.950837,26.526243


In [22]:
cohort_activity <- readRDS("~/jsp_student_projects/miRNA_DGD_DE_F2024/data/filtered_by_paper/miReact_activity/miReact_Morten.rds")# pseudocount = 1



head(cohort_activity[,1:10])
dim(cohort_activity)

Unnamed: 0,TCGA-AA-3542-01A-02R-1873-07,TCGA-CZ-5989-01A-11R-1672-07,TCGA-B4-5832-01A-11R-1672-07,TCGA-AK-3447-01A-01R-1766-07,TCGA-G9-6369-01A-21R-1965-07,TCGA-FC-A5OB-01A-11R-A29R-07,TCGA-FC-A6HD-01A-11R-A31N-07,TCGA-VN-A943-01A-11R-A41O-07,TCGA-YL-A8S9-01A-11R-A37L-07,TCGA-HC-7077-01A-11R-1965-07
AAAAAAA,-69.6128061,236.99684,223.479596,16.892340858,-0.3683421,15.2251979,-7.0000718,0.17071711,3.1377901,-1.8125606
AAAAAAC,-3.8744484,28.255148,23.512691,4.846572279,-0.2468102,3.1742971,-0.7293878,0.29545067,1.3230686,-0.5203684
AAAAAAG,-9.6387189,63.063615,62.095177,7.82058767,-0.697624,5.0367522,-1.2055121,0.7144055,1.3650877,-0.564229
AAAAAAT,-55.8028992,148.241785,135.020662,11.782410837,0.0623209,9.5567902,-7.2140514,0.04812828,2.4919745,-0.7527386
AAAAACA,-3.105498,31.470225,28.880339,8.772925404,-0.1606767,2.3579668,-0.4002306,0.30850937,0.7902424,-0.5223739
AAAAACC,-0.6776415,1.639903,1.698441,0.001012206,-0.1849276,-0.5608395,-0.9399934,-0.51734823,-0.237138,-0.6009776


In [23]:
DGD_activity <- readRDS("~/jsp_student_projects/miRNA_DGD_DE_F2024/data/filtered_by_paper/miReact_activity/miReact_DGD_filtered_normalized.rds") # pseudocount = 1


head(DGD_activity[,1:10])
dim(DGD_activity)

Unnamed: 0,TCGA-AA-3542-01A-02R-1873-07,TCGA-CZ-5989-01A-11R-1672-07,TCGA-B4-5832-01A-11R-1672-07,TCGA-AK-3447-01A-01R-1766-07,TCGA-G9-6369-01A-21R-1965-07,TCGA-FC-A5OB-01A-11R-A29R-07,TCGA-FC-A6HD-01A-11R-A31N-07,TCGA-VN-A943-01A-11R-A41O-07,TCGA-YL-A8S9-01A-11R-A37L-07,TCGA-HC-7077-01A-11R-1965-07
AAAAAAA,-72.495705,136.9306281,157.9655299,3.269184,-2.2562716,-0.825986,-8.2006992,1.8126922,-0.43425386,-27.765742
AAAAAAC,-5.299972,16.9813709,16.2753185,1.027344,-1.1867228,-0.143781,-1.6846272,0.3392916,0.09206614,-5.420072
AAAAAAG,-9.792265,35.1061265,47.200624,2.166519,-2.3948839,-0.1982465,-2.1911568,1.3879482,0.35480771,-7.933001
AAAAAAT,-54.908296,91.0232858,94.97664,3.977719,-0.3139552,-0.6288222,-6.5352034,0.9740909,-1.19291848,-15.9198
AAAAACA,-4.884646,16.6735579,20.0185477,2.957535,-0.5658411,-0.2010953,-0.7705596,0.6314137,0.09035464,-4.273717
AAAAACC,-1.193459,0.9041498,0.3830344,-1.003574,-0.650903,-2.1680818,-2.4189324,-0.9033329,-1.4729587,-2.732738


#### Create expression matrix and activity matrix

We select the miRNA's present in both the annotations and expression count, and select the matching inferred activity for the present miRNAs.
Moreover we select only the samples present in both expression count and the inferred activity. 

Both expression count matrix and activity matrix is renames to match the same naming convention. 

NB! 138 miRNA were not available in the anno_match dataframe 

In [24]:
# Finding samples present in both activity and expression counts
intersect_samples = intersect(colnames(cohort_activity), colnames(expression))

sample_an <- sample_anno[(sample_anno$sample_id %in% intersect_samples),]
# sample_an <- tSNE[(tSNE$sample_id %in% intersect_samples), ]
dim(sample_an)

# Finding miRNA present in both samples
interset_miRNAs = anno_match %>% inner_join(extended_anno, by = join_by( mature_iso_name_miRBase == name))

In [25]:
missing = anno[!(anno$target_site %in% rownames(cohort_activity)),]
head(missing)

Unnamed: 0_level_0,mature_iso_id,mirna_type,mature_iso_name_miRBase,mature_iso_validation_miRBase,seqs,seed_site,target_site
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<fct>,<chr>,<chr>
86,MIMAT0017984,mature,hsa-miR-3607-5p,experimental,,,
87,MIMAT0017985,mature,hsa-miR-3607-3p,experimental,,,
91,MIMAT0018073,mature,hsa-miR-3653-3p,experimental,,,
92,MIMAT0032110,mature,hsa-miR-3653-5p,not_experimental,,,
115,MIMAT0025855,mature,hsa-miR-6723-5p,experimental,,,
156,MIMAT0005905,mature,hsa-miR-1254,experimental,,,


In [26]:
cohort_act <- t(cohort_activity[interset_miRNAs$seed7target, intersect_samples])

colnames(cohort_act) <- interset_miRNAs$mature_iso_name_miRBase[match(colnames(cohort_act), interset_miRNAs$seed7target)]
rownames(cohort_act) <- colnames(cohort_activity[interset_miRNAs$seed7target, intersect_samples])

In [27]:
DGD_act <- t(DGD_activity[interset_miRNAs$seed7target, intersect_samples])

colnames(DGD_act) <- interset_miRNAs$mature_iso_name_miRBase[match(colnames(DGD_act), interset_miRNAs$seed7target)]
rownames(DGD_act) <- colnames(DGD_activity[interset_miRNAs$seed7target, intersect_samples])

In [28]:
exp <- t(expression[interset_miRNAs$mature_iso_id, intersect_samples])
colnames(exp) <- interset_miRNAs$mature_iso_name_miRBase[match(colnames(exp), interset_miRNAs$mature_iso_id)]
rownames(exp) <- colnames(expression[interset_miRNAs$mature_iso_id, intersect_samples])

In [29]:
rownames(interset_miRNAs) <- interset_miRNAs$mature_iso_name_miRBase
rownames(sample_an) <- sample_an$sample_id

The values are stored in an anndata object:

In [30]:
ad <- AnnData(
  X = exp,
  var = interset_miRNAs,
  obs = sample_an,
  layers = list(
    cohort_activity = cohort_act,
    DGD_activity = DGD_act
  )
)

# 

In [31]:
ad

AnnData object with n_obs × n_vars = 5800 × 2450
    obs: 'sample_id', 'cancer_type', 'sample_submitter_id', 'cancer_name', 'primary_site', 'sample_type', 'tissue_type', 'tumor_descriptor', 'age_at_index', 'gender', 'vital_status', 'tissue_or_organ_of_origin', 'color'
    var: 'mature_iso_id', 'mirna_type', 'mature_iso_name_miRBase', 'mature_iso_validation_miRBase', 'seq', 'species', 'seed7', 'seed7target', 'tcgamatcher', 'tcga1match', 'chartr', 'inOncomir', 'sum_effect', 'oncoTissue', 'generalMatcher', 'expMatch', 'expidx', 'memicor', 'totalTargets', 'geneTargets', 'pexpidx', 'htmed'
    layers: 'cohort_activity', 'DGD_activity'

In [32]:
ad$var$name <- interset_miRNAs$mature_iso_name_miRBase
ad$var$mature_id <- interset_miRNAs$mature_iso_id
ad$var$seed_target <- interset_miRNAs$seed7target
ad$var$seed <- interset_miRNAs$seed7
ad$var$sum_effect <- interset_miRNAs$sum_effect

## Step 2: Computing correlations

In [33]:
# Note, run for all activity types

pb = txtProgressBar(min = 1, max = nrow(ad$var), initial = 1, style = 3)

for (type in c("cohort", "DGD")){
  R_pearson = list()
  R_spearman = list()
  pval_pearson = list()
  pval_spearman = list()

  activity_type = paste(type, "_activity", sep = "")

  for (i in 1:nrow(ad$var)){
    pearson <- cor.test(ad[,i]$X, ad[,i]$layers[activity_type], method = "pearson")
    R_pearson[i] <- pearson$estimate
    pval_pearson[i] <- pearson$p.value
    
    spearman <- cor.test(ad[,i]$X, ad[,i]$layers[activity_type], method = "spearman", exact=FALSE)
    R_spearman[i]  <- spearman$estimate
    pval_spearman[i] <- spearman$p.value
    
    setTxtProgressBar(pb,i)}
  close(pb)

  ad$var[[paste0(type, "_R_pearson")]] <- c(R_pearson)
  ad$var[[paste0(type, "_pval_pearson")]] <- c(pval_pearson)
  ad$var[[paste0(type, "_R_spearman")]] <- c(R_spearman)
  ad$var[[paste0(type, "_pval_spearman")]] <- c(pval_spearman)
}

  |                                                                      |   0%



“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”




“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”


## Step 3: Compute means

In [34]:
ad$var$mean_expression <- colMeans(ad$X)
ad$var$cohort_mean_activity <- colMeans(ad$layers['cohort_activity'])
ad$var$DGD_mean_activity <- colMeans(ad$layers['DGD_activity'])


ad$uns$DGD_mean_R_pearson <- mean(ad$var$DGD_R_pearson, na.rm = TRUE)
ad$uns$DGD_mean_R_spearman <- mean(ad$var$DGD_R_spearman, na.rm = TRUE)

ad$uns$cohort_mean_R_pearson <- mean(ad$var$cohort_R_pearson, na.rm = TRUE)
ad$uns$cohort_mean_R_spearman <-mean(ad$var$cohort_R_spearman, na.rm = TRUE)

In [10]:
ad$uns$cohort_mean_R_pearson

ad$uns$DGD_mean_R_pearson

In [8]:
setwd("~/jsp_student_projects/miRNA_DGD_DE_F2024/data")
write_h5ad(ad, "all_samples_pc1.h5ad")

In [24]:
setwd("~/jsp_student_projects/miRNA_DGD_DE_F2024/data")
component_anno <- readRDS(file = "annotation_with_component_info.rds")

ad$obs <- ad$obs %>% left_join(component_anno)

# Step 4: Create dataframe

In [3]:
setwd("~/jsp_student_projects/miRNA_DGD_DE_F2024/data")
ad <- read_h5ad("all_samples_pc1.h5ad")

In [4]:
exp = gather(as.data.frame(ad$X) %>% rownames_to_column("sample_id"), "miRNA", "expression", -sample_id)
DGD_act = gather(as.data.frame(ad$layers["DGD_activity"]) %>% rownames_to_column("sample_id"), "miRNA", "DGD_activity", -sample_id) 
GM_act = gather(as.data.frame(ad$layers["cohort_activity"]) %>% rownames_to_column("sample_id"), "miRNA", "GM_activity", -sample_id) 

df <- left_join(exp, ad$obs) %>%
    left_join(DGD_act) %>%
    left_join(GM_act)

[1m[22mJoining with `by = join_by(sample_id)`
[1m[22mJoining with `by = join_by(sample_id, miRNA)`
[1m[22mJoining with `by = join_by(sample_id, miRNA)`


In [15]:
setwd("~/jsp_student_projects/miRNA_DGD_DE_F2024/data")
saveRDS(df, file="all_samples_df_pc1.Rda")

#df <- readRDS(file = "all_samples_df_pc1.Rda")

#head(df)

In [14]:
head(df)

[1m[22mJoining with `by = join_by(sample_id, cancer_type, sample_submitter_id,
cancer_name, primary_site, sample_type, tissue_type, tumor_descriptor,
age_at_index, gender, vital_status, tissue_or_organ_of_origin, color)`


Unnamed: 0_level_0,sample_id,miRNA,expression,cancer_type,sample_submitter_id,cancer_name,primary_site,sample_type,tissue_type,tumor_descriptor,⋯,vital_status,tissue_or_organ_of_origin,color,DGD_activity,GM_activity,max_prob_density_comp,comp_name_ours,comp_name,max_association_percent,max_prob_density
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<fct>,<chr>,<fct>,<fct>,<fct>,<fct>,<fct>,⋯,<fct>,<fct>,<fct>,<dbl>,<dbl>,<dbl>,<fct>,<fct>,<dbl>,<dbl>
1,TCGA-EW-A6SA-01A-21R-A32P-07,hsa-let-7a-5p,63867.18,BRCA,TCGA-EW-A6SA-01A,Breast Invasive Carcinoma,Breast,Primary Tumor,Tumor,Primary,⋯,Alive,"Breast, NOS",#ED1E91,-1.1463613,-0.82465086,28,Breast,Breast,100.0,9.844606e-20
2,TCGA-E2-A14W-01A-11R-A12D-07,hsa-let-7a-5p,23471.88,BRCA,TCGA-E2-A14W-01A,Breast Invasive Carcinoma,Breast,Primary Tumor,Tumor,Primary,⋯,Alive,"Breast, NOS",#ED1E91,-1.4388003,-0.99281544,28,Breast,Breast,100.0,1.050095e-09
3,TCGA-EW-A1PD-01A-11R-A144-07,hsa-let-7a-5p,23744.03,BRCA,TCGA-EW-A1PD-01A,Breast Invasive Carcinoma,Breast,Primary Tumor,Tumor,Primary,⋯,Alive,"Breast, NOS",#ED1E91,-1.1172353,-0.08194884,28,Breast,Breast,100.0,6.007054e-15
4,TCGA-55-1594-01A-01R-0946-07,hsa-let-7a-5p,10814.66,LUAD,TCGA-55-1594-01A,Lung Adenocarcinoma,Lung,Primary Tumor,Tumor,Primary,⋯,Alive,"Lower lobe, lung",#D2C3DF,-2.237455,-3.47406174,13,Lung,Cells,93.9835,2.944543e-20
5,TCGA-49-6742-11A-01R-1858-07,hsa-let-7a-5p,63666.8,LUAD,TCGA-49-6742-11A,Lung Adenocarcinoma,Lung,Solid Tissue Normal,Normal,Not Applicable,⋯,Dead,"Upper lobe, lung",#D2C3DF,0.5045059,1.64744323,43,Lung,Lung,99.99504,3.1624200000000003e-28
6,TCGA-50-5932-11A-01R-1755-07,hsa-let-7a-5p,81765.71,LUAD,TCGA-50-5932-11A,Lung Adenocarcinoma,Lung,Solid Tissue Normal,Normal,Not Applicable,⋯,Dead,"Upper lobe, lung",#D2C3DF,1.088441,2.59366978,43,Lung,Lung,99.99994,3.068785e-13
