In [177]:
library(tidyverse)

# Custom package
library(rutils)

In [178]:
dirs <- rutils::get_dev_directories(dev_paths_file = "../dev_paths.txt")
projects <- c("TCGA-CESC", "TCGA-UCS", "TCGA-UCEC", "TCGA-OV")
unified_dsets <- c("unified_cervical_data", "unified_uterine_data", "unified_uterine_endometrial_data")
matrisome_path <- paste0(dirs$data_dir, "/matrisome/matrisome_hs_masterlist.tsv")

In [179]:
dset_idx <- 2

In [180]:
p_thresh = 0.05
lfc_thresh = log2(2)
coxph_coeff_thresh = 0.0

In [181]:
matrisome_df <- rutils::load_matrisome_df(matrisome_path) %>%
    dplyr::select(gene_symbol, division, category, gene_name, synonyms) %>%
    dplyr::rename(geneID = gene_symbol)

Parsed with column specification:
cols(
  Division = col_character(),
  Category = col_character(),
  `Gene Symbol` = col_character(),
  `Gene Name` = col_character(),
  Synonyms = col_character(),
  HGNC_IDs = col_double(),
  `HGNC_IDs Links` = col_double(),
  UniProt_IDs = col_character(),
  Refseq_IDs = col_character(),
  Orthology = col_character(),
  Notes = col_character()
)


# DEG quick glance

In [182]:
deg_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_DESeq_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  baseMean = col_double(),
  log2FoldChange = col_double(),
  lfcSE = col_double(),
  stat = col_double(),
  pvalue = col_double(),
  padj = col_double()
)


In [183]:
deg_results_df %>%
    dplyr::filter(padj < p_thresh, abs(log2FoldChange) > lfc_thresh) %>%
    dplyr::select(c(geneID, baseMean, log2FoldChange, padj)) %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    top_n(-20, padj) %>%
    dplyr::arrange(padj)
    

geneID,baseMean,log2FoldChange,padj,division,category,gene_name,synonyms
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
SPARCL1,62771.82341,-4.119015,2.1844469999999996e-168,Core matrisome,ECM Glycoproteins,SPARC-like 1 (hevin),PIG33|SC1
CCL14,2978.16055,-5.91407,2.333933e-139,Matrisome-associated,Secreted Factors,chemokine (C-C motif) ligand 14,CC-1|CC-3|CKb1|FLJ16015|HCC-1|HCC-3|MCIF|NCC-2|NCC
COL9A3,1681.07192,5.298154,6.174342e-137,Core matrisome,Collagens,"collagen, type IX, alpha 3",DJ885L7.4.1|EDM3|FLJ90759|IDD|MED
CILP2,603.48165,6.537858,5.460069e-123,Core matrisome,ECM Glycoproteins,cartilage intermediate layer protein 2,CLIP-2|MGC45771
COL11A1,1412.78328,6.774685,1.2574759999999999e-111,Core matrisome,Collagens,"collagen, type XI, alpha 1",CO11A1|COLL6|STL2
WISP2,1000.90723,-5.671508,3.104336e-110,Core matrisome,ECM Glycoproteins,WNT1 inducible signaling pathway protein 2,CCN5|CT58|CTGF-L
FBN2,3298.1379,4.731756,4.812649e-107,Core matrisome,ECM Glycoproteins,fibrillin 2,CCA|DA9
S100A2,340.40472,4.787866,2.2011449999999998e-100,Matrisome-associated,Secreted Factors,S100 calcium binding protein A2,CAN19|MGC111539|S100L
TNXB,14927.39722,-4.23877,3.445513e-98,Core matrisome,ECM Glycoproteins,tenascin XB,HXBL|TENX|TNX|TNXB1|TNXB2|TNXBS|XB|XBS
PODN,5804.23838,-3.553996,7.399959e-96,Core matrisome,Proteoglycans,podocan,MGC24995|PCAN|SLRR5A


# Cox PH quick glance

In [184]:
coxph_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_coxph_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  gene_pval = col_double(),
  gene_coeff = col_double()
)


In [185]:
coxph_results_df %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(-20, gene_pval) %>%
    dplyr::arrange(gene_pval)

geneID,gene_pval,gene_coeff,division,category,gene_name,synonyms
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
COL7A1,0.0002336249,-0.7225286,Core matrisome,Collagens,"collagen, type VII, alpha 1",EBD1|EBDCT|EBR1
TGM3,0.0004093898,1.1245609,Matrisome-associated,ECM Regulators,"transglutaminase 3 (E polypeptide, protein-glutamine-gamma-glutamyltransferase)",MGC126249|MGC126250|TGE
CST6,0.0005719445,-1.0079816,Matrisome-associated,ECM Regulators,cystatin E/M,-
SDC1,0.0010491945,-0.7943372,Matrisome-associated,ECM-affiliated Proteins,syndecan 1,CD138|SDC|SYND1|syndecan
MUC12,0.0011265731,1.2784272,Matrisome-associated,ECM-affiliated Proteins,"mucin 12, cell surface associated",MUC11
COL5A3,0.0015920889,0.8329635,Core matrisome,Collagens,"collagen, type V, alpha 3",-
WNT10A,0.0017444436,-0.572024,Matrisome-associated,Secreted Factors,"wingless-type MMTV integration site family, member 10A",FLJ14301|SSPS
IFNA1,0.0020149675,-6.510222,Matrisome-associated,Secreted Factors,"interferon, alpha 1",IFL|IFN|IFN-ALPHA|IFNA13|IFNA@|MGC138207|MGC138505
WNT7B,0.0025813922,-0.4752282,Matrisome-associated,Secreted Factors,"wingless-type MMTV integration site family, member 7B",-
TIMP4,0.0027979185,0.4775624,Matrisome-associated,ECM Regulators,TIMP metallopeptidase inhibitor 4,-


# Correlation quick glance

In [186]:
cor_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_cor_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  cor = col_double(),
  pval = col_double(),
  n = col_double()
)


In [187]:
cor_results_df %>%
    dplyr::select(geneID, cor, pval) %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(-20, pval) %>%
    dplyr::arrange(pval)

geneID,cor,pval,division,category,gene_name,synonyms
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
FGF16,0.7061038,3.866132e-05,Matrisome-associated,Secreted Factors,fibroblast growth factor 16,-
IL9,0.5942475,0.001081511,Matrisome-associated,Secreted Factors,interleukin 9,HP40|IL-9|P40
IFNA2,0.5942475,0.001081511,Matrisome-associated,Secreted Factors,"interferon, alpha 2",IFNA|INFA2|MGC125764|MGC125765
TSKU,0.5878516,0.001261943,Core matrisome,ECM Glycoproteins,tsukushi small leucine rich proteoglycan homolog (Xenopus laevis),E2IG4|LRRC54|TSK
TECTB,0.5407042,0.003592,Core matrisome,ECM Glycoproteins,tectorin beta,KIAA1560|MGC142057|MGC142059
ANGPTL6,0.5320337,0.004284821,Matrisome-associated,Secreted Factors,angiopoietin-like 6,AGF|ARP5
OGN,0.5116528,0.006372819,Core matrisome,Proteoglycans,osteoglycin,DKFZp586P2421|OG|OIF|SLRR3A
FGL1,-0.4973148,0.008308743,Core matrisome,ECM Glycoproteins,fibrinogen-like 1,HFREP1|HP-041|LFIRE1|MGC12455
FGF6,0.486533,0.01007027,Matrisome-associated,Secreted Factors,fibroblast growth factor 6,HBGF-6|HST2
TGM6,0.4859619,0.01017166,Matrisome-associated,ECM Regulators,transglutaminase 6,TGM3L|TGY|dJ734P14.3


# MI survival quick glance

In [188]:
mi_survival_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_MI_survival_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  MI_est_median = col_double()
)


In [189]:
mi_survival_results_df %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(20, MI_est_median) %>%
    dplyr::arrange(desc(MI_est_median))

geneID,MI_est_median,division,category,gene_name,synonyms
<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>
COL9A2,0.3096317,Core matrisome,Collagens,"collagen, type IX, alpha 2",DJ39G22.4|EDM2|MED
CTSG,0.28912,Matrisome-associated,ECM Regulators,cathepsin G,CG|MGC23078
SEMA3D,0.2537188,Matrisome-associated,ECM-affiliated Proteins,"sema domain, immunoglobulin domain (Ig), short basic domain, secreted, (semaphorin) 3D",MGC39708|Sema-Z2|coll-2
IL17C,0.2498483,Matrisome-associated,Secreted Factors,interleukin 17C,CX2|IL-17C|IL-21|MGC126884|MGC138401
S100A3,0.2403626,Matrisome-associated,Secreted Factors,S100 calcium binding protein A3,S100E
ADAM33,0.2396814,Matrisome-associated,ECM Regulators,ADAM metallopeptidase domain 33,C20orf153|DJ964F7.1|DKFZp434K0521|FLJ35308|FLJ3675
TGFA,0.2301018,Matrisome-associated,Secreted Factors,"transforming growth factor, alpha",TFGA
SPP1,0.2249892,Core matrisome,ECM Glycoproteins,secreted phosphoprotein 1,BNSP|BSPI|ETA-1|MGC110940|OPN
COL27A1,0.2236791,Core matrisome,Collagens,"collagen, type XXVII, alpha 1",FLJ11895|KIAA1870|MGC11337
MMP13,0.2233744,Matrisome-associated,ECM Regulators,matrix metallopeptidase 13 (collagenase 3),CLG3|MANDP1


# MAE GBR quick glance

In [190]:
mae_gbr_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_mae_gbr_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  mean_imp_0 = col_double(),
  score_pct_improvement_0 = col_double(),
  mean_imp_1 = col_double(),
  score_pct_improvement_1 = col_double(),
  mean_imp_2 = col_double(),
  score_pct_improvement_2 = col_double(),
  mean_imp_3 = col_double(),
  score_pct_improvement_3 = col_double(),
  mean_imp_4 = col_double(),
  score_pct_improvement_4 = col_double(),
  consensus_imp_mean = col_double(),
  consensus_imp_std = col_double(),
  consensus_imp_cv = col_double(),
  consensus_vote = col_logical()
)


In [191]:
mae_gbr_results_df %>%
    dplyr::select(geneID, contains("pct"), consensus_vote) %>%
    dplyr::mutate(consensus_mean_pct_imp = rowMeans(dplyr::select(., contains("pct")))) %>%
    dplyr::select(geneID, consensus_vote, consensus_mean_pct_imp) %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(20, consensus_mean_pct_imp) %>%
    dplyr::arrange(desc(consensus_mean_pct_imp))

geneID,consensus_vote,consensus_mean_pct_imp,division,category,gene_name,synonyms
<chr>,<lgl>,<dbl>,<chr>,<chr>,<chr>,<chr>
CTSV,True,1.03170675,Matrisome-associated,ECM Regulators,cathepsin L2,CATL2|CTSU|CTSV|MGC125957|CTSL2
INHBC,True,0.9924938,Matrisome-associated,Secreted Factors,"inhibin, beta C",IHBC
CCL16,True,0.08939644,Matrisome-associated,Secreted Factors,chemokine (C-C motif) ligand 16,CKb12|HCC-4|ILINCK|LCC-1|LEC|LMC|MGC117051|Mtn-1|N
ANGPTL7,True,0.07644699,Matrisome-associated,Secreted Factors,angiopoietin-like 7,AngX|CDT6|RP4-647M16.2|dJ647M16.1
TMPRSS15,False,0.0714004,Matrisome-associated,ECM Regulators,"protease, serine, 7 (enterokinase)",ENTK|MGC133046|TMPRSS15|PRSS7
SFRP5,True,0.04796304,Matrisome-associated,Secreted Factors,secreted frizzled-related protein 5,SARP3
FN1,True,0.04042421,Core matrisome,ECM Glycoproteins,fibronectin 1,CIG|DKFZp686F10164|DKFZp686H0342|DKFZp686I1370|DKF
WISP1,True,0.03666048,Core matrisome,ECM Glycoproteins,WNT1 inducible signaling pathway protein 1,CCN4|WISP1c|WISP1i|WISP1tc
COL25A1,True,0.03610745,Core matrisome,Collagens,"collagen, type XXV, alpha 1",CLAC|CLACP
CLEC12A,True,0.03374429,Matrisome-associated,ECM-affiliated Proteins,"C-type lectin domain family 12, member A",CLL-1|CLL1|DCAL-2|MGC70602|MICL


# EV GBR quick glance

In [192]:
ev_gbr_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_ev_gbr_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  mean_imp_0 = col_double(),
  score_pct_improvement_0 = col_double(),
  mean_imp_1 = col_double(),
  score_pct_improvement_1 = col_double(),
  mean_imp_2 = col_double(),
  score_pct_improvement_2 = col_double(),
  mean_imp_3 = col_double(),
  score_pct_improvement_3 = col_double(),
  mean_imp_4 = col_double(),
  score_pct_improvement_4 = col_double(),
  consensus_imp_mean = col_double(),
  consensus_imp_std = col_double(),
  consensus_imp_cv = col_double(),
  consensus_vote = col_logical()
)


In [193]:
ev_gbr_results_df %>%
    dplyr::select(geneID, contains("pct"), consensus_vote) %>%
    dplyr::mutate(consensus_mean_pct_imp = rowMeans(dplyr::select(., contains("pct")))) %>%
    dplyr::select(geneID, consensus_vote, consensus_mean_pct_imp) %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(20, consensus_mean_pct_imp) %>%
    dplyr::arrange(desc(consensus_mean_pct_imp))

geneID,consensus_vote,consensus_mean_pct_imp,division,category,gene_name,synonyms
<chr>,<lgl>,<dbl>,<chr>,<chr>,<chr>,<chr>
FGF1,True,32.1966959,Matrisome-associated,Secreted Factors,fibroblast growth factor 1 (acidic),AFGF|ECGF|ECGF-beta|ECGFA|ECGFB|FGF-alpha|FGFA|GLI
INHBC,True,30.7086815,Matrisome-associated,Secreted Factors,"inhibin, beta C",IHBC
LGALS9B,True,28.983941,Matrisome-associated,ECM-affiliated Proteins,"lectin, galactoside-binding, soluble, 9B",FLJ58146|FLJ75081
SLIT2,True,23.8823489,Core matrisome,ECM Glycoproteins,slit homolog 2 (Drosophila),FLJ14420|SLIL3|Slit-2
CTSG,True,15.2848516,Matrisome-associated,ECM Regulators,cathepsin G,CG|MGC23078
SFRP2,True,11.8558355,Matrisome-associated,Secreted Factors,secreted frizzled-related protein 2,FRP-2|SARP1|SDF-5
CTSV,True,7.6207291,Matrisome-associated,ECM Regulators,cathepsin L2,CATL2|CTSU|CTSV|MGC125957|CTSL2
WNT7B,True,7.5323356,Matrisome-associated,Secreted Factors,"wingless-type MMTV integration site family, member 7B",-
TMPRSS15,True,5.6932101,Matrisome-associated,ECM Regulators,"protease, serine, 7 (enterokinase)",ENTK|MGC133046|TMPRSS15|PRSS7
OMD,True,5.4181083,Core matrisome,Proteoglycans,osteomodulin,OSAD|SLRR2C


# MI FIGO quick glance

In [194]:
mi_figo_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_MI_figo_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  MI_est_median = col_double()
)


In [195]:
mi_figo_results_df %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(20, MI_est_median) %>%
    dplyr::arrange(desc(MI_est_median))

geneID,MI_est_median,division,category,gene_name,synonyms
<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>
LOXL3,0.9755391,Matrisome-associated,ECM Regulators,lysyl oxidase-like 3,LOXL
ADAM8,0.9283197,Matrisome-associated,ECM Regulators,ADAM metallopeptidase domain 8,CD156|MGC134985|MS2
ANXA8L1,0.8728699,Matrisome-associated,ECM-affiliated Proteins,annexin A8-like 1,bA301J7.3
CCL3L3,0.8561879,Matrisome-associated,Secreted Factors,chemokine (C-C motif) ligand 3-like 3,464.2|CCL3L1|D17S1718|LD78|LD78BETA|MGC12815|SCYA3
PPBP,0.8160963,Matrisome-associated,Secreted Factors,pro-platelet basic protein (chemokine (C-X-C motif) ligand 7),B-TG1|Beta-TG|CTAP-III|CTAP3|CTAPIII|CXCL7|LA-PF4|
NTNG1,0.8131801,Core matrisome,ECM Glycoproteins,netrin G1,KIAA0976|Lmnt1
IFNA5,0.7771863,Matrisome-associated,Secreted Factors,"interferon, alpha 5",INFA5
PLXNA2,0.758161,Matrisome-associated,ECM-affiliated Proteins,plexin A2,FLJ11751|FLJ30634|KIAA0463|OCT|PLXN2
SBSPON,0.7567825,Core matrisome,ECM Glycoproteins,chromosome 8 open reading frame 84,FLJ40021|RPESP|C8orf84
CX3CL1,0.7385588,Matrisome-associated,Secreted Factors,chemokine (C-X3-C motif) ligand 1,ABCD-3|C3Xkine|CXC3|CXC3C|NTN|NTT|SCYD1|fractalkin


# F1 GBC quick glance

In [196]:
f1_gbc_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_gbc_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  mean_imp_0 = col_double(),
  score_pct_improvement_0 = col_double(),
  mean_imp_1 = col_double(),
  score_pct_improvement_1 = col_double(),
  mean_imp_2 = col_double(),
  score_pct_improvement_2 = col_double(),
  mean_imp_3 = col_double(),
  score_pct_improvement_3 = col_double(),
  mean_imp_4 = col_double(),
  score_pct_improvement_4 = col_double(),
  consensus_imp_mean = col_double(),
  consensus_imp_std = col_double(),
  consensus_imp_cv = col_double(),
  consensus_vote = col_logical()
)


In [197]:
f1_gbc_results_df %>%
    dplyr::select(geneID, contains("pct"), consensus_vote) %>%
    dplyr::mutate(consensus_mean_pct_imp = rowMeans(dplyr::select(., contains("pct")))) %>%
    dplyr::select(geneID, consensus_vote, consensus_mean_pct_imp) %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(20, consensus_mean_pct_imp) %>%
    dplyr::arrange(desc(consensus_mean_pct_imp))

geneID,consensus_vote,consensus_mean_pct_imp,division,category,gene_name,synonyms
<chr>,<lgl>,<dbl>,<chr>,<chr>,<chr>,<chr>
AMELX,False,2.200979,Core matrisome,ECM Glycoproteins,"amelogenin (amelogenesis imperfecta 1, X-linked)",AIH1|ALGN|AMG|AMGL|AMGX
NCAN,True,1.832294,Core matrisome,Proteoglycans,neurocan,CSPG3|FLJ44681
CSF3,False,1.675147,Matrisome-associated,Secreted Factors,colony stimulating factor 3 (granulocyte),C17orf33|CSF3OS|G-CSF|GCSF|MGC45931
XCL1,False,1.2901529,Matrisome-associated,Secreted Factors,chemokine (C motif) ligand 1,ATAC|LPTN|LTN|SCM-1|SCM-1a|SCM1|SCYC1
LPA,False,0.9904496,Matrisome-associated,ECM Regulators,"lipoprotein, Lp(a)",AK38|APOA|LP
TGM3,False,0.8955166,Matrisome-associated,ECM Regulators,"transglutaminase 3 (E polypeptide, protein-glutamine-gamma-glutamyltransferase)",MGC126249|MGC126250|TGE
PF4V1,False,0.7594193,Matrisome-associated,Secreted Factors,platelet factor 4 variant 1,CXCL4L1|CXCL4V1|PF4-ALT|PF4A|SCYB4V1
CHRDL2,False,0.7342582,Matrisome-associated,Secreted Factors,chordin-like 2,BNF1|CHL2|DKFZp586N2124|FKSG37
OSM,False,0.6853571,Matrisome-associated,Secreted Factors,oncostatin M,MGC20461
SERPINA4,False,0.6424801,Matrisome-associated,ECM Regulators,"serpin peptidase inhibitor, clade A (alpha-1 antiproteinase, antitrypsin), member 4",KAL|KLST|KST|PI4|kallistatin


# F1 RFC quick glance

In [198]:
f1_rfc_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_rfc_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  mean_imp_0 = col_double(),
  score_pct_improvement_0 = col_double(),
  mean_imp_1 = col_double(),
  score_pct_improvement_1 = col_double(),
  mean_imp_2 = col_double(),
  score_pct_improvement_2 = col_double(),
  mean_imp_3 = col_double(),
  score_pct_improvement_3 = col_double(),
  mean_imp_4 = col_double(),
  score_pct_improvement_4 = col_double(),
  consensus_imp_mean = col_double(),
  consensus_imp_std = col_double(),
  consensus_imp_cv = col_double(),
  consensus_vote = col_logical()
)


In [199]:
f1_rfc_results_df %>%
    dplyr::select(geneID, contains("pct"), consensus_vote) %>%
    dplyr::mutate(consensus_mean_pct_imp = rowMeans(dplyr::select(., contains("pct")))) %>%
    dplyr::select(geneID, consensus_vote, consensus_mean_pct_imp) %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(20, consensus_mean_pct_imp) %>%
    dplyr::arrange(desc(consensus_mean_pct_imp))

geneID,consensus_vote,consensus_mean_pct_imp,division,category,gene_name,synonyms
<chr>,<lgl>,<dbl>,<chr>,<chr>,<chr>,<chr>
NCAN,False,2.2305067,Core matrisome,Proteoglycans,neurocan,CSPG3|FLJ44681
TCHH,False,2.2244728,Matrisome-associated,Secreted Factors,trichohyalin,MGC157889|MGC157890|THH|THL|TRHY
CHRDL2,False,1.8886852,Matrisome-associated,Secreted Factors,chordin-like 2,BNF1|CHL2|DKFZp586N2124|FKSG37
XCL1,False,1.8300103,Matrisome-associated,Secreted Factors,chemokine (C motif) ligand 1,ATAC|LPTN|LTN|SCM-1|SCM-1a|SCM1|SCYC1
COL11A1,False,1.6658268,Core matrisome,Collagens,"collagen, type XI, alpha 1",CO11A1|COLL6|STL2
GDF6,False,1.4732691,Matrisome-associated,Secreted Factors,growth differentiation factor 6,BMP13|CDMP2|KFS|KFSL|MCOP4|MGC158100|MGC158101|SCD
TGM3,False,1.4680128,Matrisome-associated,ECM Regulators,"transglutaminase 3 (E polypeptide, protein-glutamine-gamma-glutamyltransferase)",MGC126249|MGC126250|TGE
LPA,False,1.2138383,Matrisome-associated,ECM Regulators,"lipoprotein, Lp(a)",AK38|APOA|LP
TIMP2,False,1.1756255,Matrisome-associated,ECM Regulators,TIMP metallopeptidase inhibitor 2,CSC-21K
ZP3,False,1.1405991,Core matrisome,ECM Glycoproteins,zona pellucida glycoprotein 3 (sperm receptor),ZP3A|ZP3B|ZPC


# L1 LR quick glance

In [200]:
l1_lr_results_df <- read_tsv(paste0(dirs$analysis_dir, "/", unified_dsets[dset_idx], "_l1_lr_results.tsv"))

Parsed with column specification:
cols(
  geneID = col_character(),
  mean_imp_0 = col_double(),
  score_pct_improvement_0 = col_double(),
  mean_imp_1 = col_double(),
  score_pct_improvement_1 = col_double(),
  mean_imp_2 = col_double(),
  score_pct_improvement_2 = col_double(),
  mean_imp_3 = col_double(),
  score_pct_improvement_3 = col_double(),
  mean_imp_4 = col_double(),
  score_pct_improvement_4 = col_double(),
  consensus_imp_mean = col_double(),
  consensus_imp_std = col_double(),
  consensus_imp_cv = col_double(),
  consensus_vote = col_logical()
)


In [201]:
l1_lr_results_df %>%
    dplyr::select(geneID, contains("pct"), consensus_vote) %>%
    dplyr::mutate(consensus_mean_pct_imp = rowMeans(dplyr::select(., contains("pct")))) %>%
    dplyr::select(geneID, consensus_vote, consensus_mean_pct_imp) %>%
    dplyr::inner_join(matrisome_df, by = "geneID") %>%
    dplyr::top_n(20, consensus_mean_pct_imp) %>%
    dplyr::arrange(desc(consensus_mean_pct_imp))

geneID,consensus_vote,consensus_mean_pct_imp,division,category,gene_name,synonyms
<chr>,<lgl>,<dbl>,<chr>,<chr>,<chr>,<chr>
FGF1,True,9.549765,Matrisome-associated,Secreted Factors,fibroblast growth factor 1 (acidic),AFGF|ECGF|ECGF-beta|ECGFA|ECGFB|FGF-alpha|FGFA|GLI
CELA1,True,8.284395,Matrisome-associated,ECM Regulators,"chymotrypsin-like elastase family, member 1",ELA1
PDGFC,True,7.715524,Matrisome-associated,Secreted Factors,platelet derived growth factor C,FALLOTEIN|SCDGF
CCL16,True,6.328994,Matrisome-associated,Secreted Factors,chemokine (C-C motif) ligand 16,CKb12|HCC-4|ILINCK|LCC-1|LEC|LMC|MGC117051|Mtn-1|N
NCAN,True,6.214198,Core matrisome,Proteoglycans,neurocan,CSPG3|FLJ44681
XCL1,True,5.835793,Matrisome-associated,Secreted Factors,chemokine (C motif) ligand 1,ATAC|LPTN|LTN|SCM-1|SCM-1a|SCM1|SCYC1
MEGF11,True,5.443356,Matrisome-associated,Secreted Factors,multiple EGF-like-domains 11,DKFZp434L121|KIAA1781
BMPER,True,5.424592,Core matrisome,ECM Glycoproteins,BMP binding endothelial regulator,CRIM3|CV-2|CV2
WNT1,True,4.79032,Matrisome-associated,Secreted Factors,"wingless-type MMTV integration site family, member 1",INT1
SERPINB9,True,4.666706,Matrisome-associated,ECM Regulators,"serpin peptidase inhibitor, clade B (ovalbumin), member 9",CAP-3|CAP3|PI9
