# Eigengene Single-Variant Association Exploration

**Created**: 25 November 2021

## Environment

In [1]:
if (!requireNamespace("BiocManager", quietly = TRUE)) {
    install.packages("BiocManager")
}

if (!requireNamespace("ComplexHeatmap", quietly=TRUE)) {
    BiocManager::install("ComplexHeatmap")
}

In [2]:
library(tidyverse)
library(RColorBrewer)
library(ComplexHeatmap)
library(data.table)

setwd("~/eQTL_pQTL_Characterization/")

source("04_Expression/scripts/utils/ggplot_theme.R")

── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.1.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Loading required package: grid

ComplexHeatmap version 2.6.2
Bioconductor page: 

## Load Data

In [3]:
demographics <- readxl::read_xls("/nfs/team282/data/gains_team282/ClinicalData/DEMO_12jun2019.xls", sheet=1) %>%
    dplyr::mutate(sex=as.numeric(sex)) %>%
    dplyr::mutate(diagnosis=as.numeric(diagnosis))

ga.prefix.index <- grepl("GA", demographics$SubjectBarCode)
demographics$SubjectBarCode[ga.prefix.index] <- gsub("^GA", "", demographics$SubjectBarCode[ga.prefix.index])

In [4]:
srs.info <- read.table("/nfs/team282/data/gains_team282/full-gains-SRS-predictions_mNN-RF.tsv", header=T)
ga.prefix.index <- grepl("GA", srs.info$Sample_id)
srs.info$Sample_id[ga.prefix.index] <- gsub("^GA", "", srs.info$Sample_id[ga.prefix.index])
srs.info <- srs.info %>%
  dplyr::filter(Assay=="RNA-seq")
row.names(srs.info) <- srs.info$Sample_id

In [5]:
covs <- read.table("~/gains_team282/eqtl/data/covs_and_peer_factors.txt") %>%
    dplyr::mutate(Sample.ID=rownames(.)) %>%
    dplyr::mutate(GAinS.ID=sapply(strsplit(Sample.ID, "_"), function(x) { x[1] }))

ga.prefix.index <- grepl("GA", covs$GAinS.ID)
covs$GAinS.ID[ga.prefix.index] <- gsub("^GA", "", covs$GAinS.ID[ga.prefix.index])
covs$Sample.ID[ga.prefix.index] <- gsub("^GA", "", covs$Sample.ID[ga.prefix.index])

In [6]:
held.out <- c("Neutrophils", "Lymphocytes", "Monocytes", paste0("PC", 1:7), "Diagnosis", "SRSq", "sex")
peer <- paste0("PEER_", 1:30)

covs <- covs %>%
    merge(., demographics, by.x="GAinS.ID", by.y="SubjectBarCode") %>%
    merge(., srs.info, by.x="Sample.ID", by.y=0) %>%
    dplyr::select(diagnosis=diagnosis, everything()) %>%
    dplyr::select(Sample.ID, any_of(held.out), any_of(peer)) %>%
    as.data.frame()
                                  
rownames(covs) <- covs$Sample.ID
covs <- covs %>%
    dplyr::select(-Sample.ID)
                                  
head(covs)

Unnamed: 0_level_0,Neutrophils,Lymphocytes,Monocytes,PC1,PC2,PC3,PC4,PC5,PC6,PC7,⋯,PEER_21,PEER_22,PEER_23,PEER_24,PEER_25,PEER_26,PEER_27,PEER_28,PEER_29,PEER_30
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
UK01050129_5,-1.7381318,1.8281826,1.2829455,0.177279994,0.051694602,0.0248603,-0.035102502,-0.00455149,-0.0603707,0.0396488,⋯,-0.001172263,-0.05533835,0.049742281,0.03725176,0.04495762,0.12696229,0.08175036,0.05295999,0.164950684,-0.10379386
UK01060123_3,-0.384537,0.3878375,0.4548023,-0.000388133,-0.000674099,0.00810357,0.000212791,0.0339615,0.0266317,-0.0188528,⋯,-0.012723563,0.02226736,-0.027520355,-0.05782387,0.03653939,0.05959492,-0.02611259,-0.01790827,0.012047987,0.05554044
UK01070117_3,-0.7738039,0.5846032,0.9325467,0.0164323,-0.048464298,0.00813966,-0.0476996,-0.0792683,0.0263629,-0.0822474,⋯,0.040162142,0.06840651,6.2529e-05,0.05292596,0.01705806,0.05931079,0.0896273,0.03772458,0.008254956,-0.01570964
UK01080111_1,0.1305646,0.2888638,-0.6232047,0.161789,0.036224801,0.00467903,-0.048611499,0.00180036,-0.0352357,0.0501251,⋯,-0.002928361,-0.03274319,0.088269033,0.07396927,-0.01906336,0.02454052,0.04243371,0.03055887,0.12338376,-0.02276623
UK01110093_1,1.0909779,-1.248816,-0.4429439,-0.00629672,0.00282151,0.00699239,0.00435165,0.00268802,0.0166922,-0.00733983,⋯,-0.108193189,0.01590979,-0.093242317,-0.03037383,-0.07385907,-0.05968965,0.0969543,-0.03975484,0.130198911,-0.05011296
UK01110093_5,0.2302416,0.0,-0.3306479,-0.00629672,0.00282151,0.00699239,0.00435165,0.00268802,0.0166922,-0.00733983,⋯,-0.018054215,0.02094322,-0.008623131,0.09560122,-0.07802416,0.02365689,0.08040638,-0.02349218,0.189585552,-0.07635343


In [7]:
eigengenes <- read.csv("~/gains_team282/nikhil/expression/gene_expression/eigengenes.clr.csv", row.names=1)

head(eigengenes)

Unnamed: 0_level_0,ME_1,ME_2,ME_3,ME_4,ME_5,ME_6,ME_7,ME_8,ME_9,ME_10,⋯,ME_17,ME_18,ME_19,ME_20,ME_21,ME_22,ME_23,ME_24,ME_25,ME_26
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
UK02270173_3,-0.016115,-0.00873998,0.017559222,-0.027088697,-0.014542344,-0.051399999,0.046803451,-0.034436746,-0.039087399,0.007150941,⋯,-0.028049775,-0.008129458,-0.0366723556,-0.02587513,-0.028752001,-0.029350268,-0.033465625,0.010493801,0.00122623,-0.024552444
UK15130120_3,0.02461525,6.068727e-05,0.019840461,-0.001849399,0.015906557,-0.042843776,0.027117922,0.008308366,-0.052541896,0.002858297,⋯,-0.05142435,0.032422585,-0.0460940914,-0.020925083,-0.045620906,0.001037124,-0.046257297,-0.006227393,0.03094529,-0.009247218
UK58000006_3,-0.02437838,-0.02023844,0.011494096,-0.016979651,-0.012391572,-0.013535154,0.028149929,-0.036083414,0.006142253,-0.012787746,⋯,0.003129053,-0.042368235,-0.0007423982,-0.016100324,-0.004058335,-0.004009045,-0.033120185,0.022609639,-0.02542599,-0.015154949
UK47010004_3,-0.01080795,0.00869839,0.007577865,0.013627892,-0.004619325,0.002682967,-0.009939209,0.006247252,0.002732679,0.019114337,⋯,0.011108371,-0.017369738,-0.0004493176,-0.008345582,0.019301199,-0.016442041,0.005647093,0.014716956,-0.01134338,0.010130191
UK42020088_5,-0.01290457,0.03118465,0.015750574,-0.011878781,-0.017614265,0.003339252,0.002623253,-0.001437406,0.001814986,0.002952335,⋯,-0.003885891,0.029755732,-0.0015589206,0.031183587,-0.011606689,-0.00741297,0.003816931,-0.025490423,0.02710027,-0.01525664
UK47490007_3,0.01069874,0.03416032,0.013941807,0.025513984,0.009282998,0.008292475,-0.022579807,0.027908019,-0.012258133,-0.053561098,⋯,-0.018626363,0.027655567,-0.0135092661,-0.027328549,-0.005307802,-0.020544032,-0.016863817,0.000202337,0.02977359,-0.00388478


In [8]:
modules <- read.csv("~/gains_team282/nikhil/expression/gene_expression/modules.csv")

In [9]:
head(modules)

Unnamed: 0_level_0,Gene,Module
Unnamed: 0_level_1,<chr>,<chr>
1,ENSG00000005206,Module_1
2,ENSG00000005243,Module_1
3,ENSG00000005448,Module_1
4,ENSG00000005882,Module_1
5,ENSG00000006831,Module_1
6,ENSG00000007168,Module_1


In [10]:
gene.exp <- read.table("/lustre/scratch119/humgen/projects/gains_team282/eqtl/data/logcpm_864_20412_hla.txt")

In [11]:
head(gene.exp)

Unnamed: 0_level_0,UK02270173_3,UK15130120_3,UK58000006_3,UK47010004_3,UK42020088_5,UK47490007_3,UK02770164_3,UK02770164_5,UK02630151_3,UK42150107_1,⋯,UK59070043_3,UK59070043_5,UK02510223_3,UK02XX0336_5,UK29090086_3,UK02XX0335_1,UK02XX0334_3,UK01210130_3,UK01210130_5,UK01380125_1
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ENSG00000230521,0.6644339,0.58271604,0.4046756,0.20905157,0.2211571,0.4288519,0.5133516,0.57768877,0.6990766,0.3812037,⋯,0.3823184,0.5423684,0.8676263,0.73833461,0.6461583,0.1227063,0.9621386,0.65838459,1.03561474,0.4457301
ENSG00000225864,0.9817872,0.43879936,0.6022344,0.92340489,0.2211571,1.1450686,0.1921362,0.57768877,0.1974851,1.2290495,⋯,0.6842527,0.8539227,0.91886777,0.1258162,0.5511865,0.4384007,0.5465139,0.40803066,0.46899151,0.3544115
ENSG00000227766,1.2117156,0.30682669,0.662501,0.79963286,0.6445686,0.313029,0.1183231,0.1675045,0.15059,1.0533199,⋯,0.4901384,0.4340781,0.37504164,0.04316943,0.6151905,0.3406488,0.3147798,0.43511377,0.17382742,0.1157514
ENSG00000237669,0.6420763,0.6925683,0.9792012,0.69929584,0.3765156,1.0756817,0.891205,0.60129641,0.8534668,1.0533199,⋯,1.0790156,0.6102946,1.03953331,0.97965846,0.7061683,0.7489857,0.7919934,0.85168813,0.69120049,1.0798771
ENSG00000271581,2.4744895,1.3315314,1.8310464,2.47531701,1.8812042,2.0531366,0.5972544,1.00686213,1.2811361,2.4244938,⋯,2.4728831,1.5338898,1.59080138,0.62923925,1.2804253,1.4727511,1.218765,1.46471386,1.44139443,1.189625
ENSG00000285647,0.0,0.03379072,0.0,0.05517067,0.674827,0.0,0.0,0.03509293,0.6990766,1.6556266,⋯,3.4117627,2.4012963,0.04682116,0.0,0.0,0.4695662,0.3762952,0.07078527,0.07206309,0.4159282


## Gene Expression PCs

The WGCNA procedure discards ~8000 genes. I will calculate PCs from these genes to control for technical factors not accounted for in other covariates. I am hypothesizing that the modules formed from the rest of the genes represent true biological signals. I'll include the first 30 PCs.

In [12]:
unassigned.genes <- modules$Gene[modules$Module == "Unassigned"]
unassigned.gene.exp <- t(gene.exp[unassigned.genes,])
unassigned.gene.exp.svd <- svd(scale(unassigned.gene.exp))

In [13]:
gene.exp.pcs <- unassigned.gene.exp.svd$u[,1:30]
rownames(gene.exp.pcs) <- rownames(unassigned.gene.exp)
pcs <- paste0("Gene_Exp_PC", 1:30)
colnames(gene.exp.pcs) <- pcs
rownames(gene.exp.pcs) <- gsub("^GA", "", rownames(gene.exp.pcs))
gene.exp.pcs <- gene.exp.pcs[rownames(covs),]

In [14]:
head(gene.exp.pcs)

Unnamed: 0,Gene_Exp_PC1,Gene_Exp_PC2,Gene_Exp_PC3,Gene_Exp_PC4,Gene_Exp_PC5,Gene_Exp_PC6,Gene_Exp_PC7,Gene_Exp_PC8,Gene_Exp_PC9,Gene_Exp_PC10,⋯,Gene_Exp_PC21,Gene_Exp_PC22,Gene_Exp_PC23,Gene_Exp_PC24,Gene_Exp_PC25,Gene_Exp_PC26,Gene_Exp_PC27,Gene_Exp_PC28,Gene_Exp_PC29,Gene_Exp_PC30
UK01050129_5,-0.0098921471,-0.09457859,0.019619028,-0.006053968,-0.010014475,0.001390533,-0.01882312,0.045401167,0.006883346,0.006755238,⋯,0.02999848,0.0011984274,0.01692297,-0.008912423,-0.02361353,0.005014524,0.015967724,-0.0186314749,0.00339096,-0.010916913
UK01060123_3,0.0049384522,-0.01108749,0.049383085,-0.012490938,-0.01054288,-0.016914468,-0.02053907,-0.032108852,-0.013645268,0.00908,⋯,-0.012985351,-0.0018916478,0.007902303,0.053626065,-0.04564304,0.015595516,0.018625615,0.0439581234,-0.009012608,0.004920957
UK01070117_3,-0.0110037418,-0.02078242,0.012070564,0.014861359,0.008699888,0.045287156,-0.02482057,-0.027452999,-0.027263803,0.001115378,⋯,-0.017683965,-0.0004909425,-0.000824064,-0.015669003,-0.03225682,0.004759373,0.009953229,0.0006137304,0.016741733,-0.050861668
UK01080111_1,0.0048234237,-0.01423497,-0.008435125,-0.011477852,-0.078491748,0.003516397,0.04455594,0.004470054,-0.019130497,-0.067836283,⋯,0.004055798,0.0026828586,-0.011826754,-0.026489872,-0.0107378,0.005128157,0.02581314,-0.031698368,-0.004046843,0.024874972
UK01110093_1,-0.0005996589,0.09544154,-0.02381844,0.093871806,-0.020261902,-0.048015885,-0.01906093,-0.028518358,0.003076185,0.002225801,⋯,-0.054920139,0.0207910453,0.03847084,0.005280355,0.01412896,-0.069819205,-0.01425291,-0.0625275028,-0.034057754,0.025129987
UK01110093_5,-0.0361828459,0.07710132,0.009659849,0.098195882,0.033509565,-0.041692562,-0.03817891,0.058626027,0.041034816,-0.058515295,⋯,-0.052643654,0.0541278245,-0.03659409,0.040833731,-0.04799052,-0.048906855,-0.051968698,-0.0303978809,0.011576313,-0.047137065


In [15]:
covs <- cbind(covs, gene.exp.pcs)

In [16]:
head(covs)

Unnamed: 0_level_0,Neutrophils,Lymphocytes,Monocytes,PC1,PC2,PC3,PC4,PC5,PC6,PC7,⋯,Gene_Exp_PC21,Gene_Exp_PC22,Gene_Exp_PC23,Gene_Exp_PC24,Gene_Exp_PC25,Gene_Exp_PC26,Gene_Exp_PC27,Gene_Exp_PC28,Gene_Exp_PC29,Gene_Exp_PC30
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
UK01050129_5,-1.7381318,1.8281826,1.2829455,0.177279994,0.051694602,0.0248603,-0.035102502,-0.00455149,-0.0603707,0.0396488,⋯,0.02999848,0.0011984274,0.01692297,-0.008912423,-0.02361353,0.005014524,0.015967724,-0.0186314749,0.00339096,-0.010916913
UK01060123_3,-0.384537,0.3878375,0.4548023,-0.000388133,-0.000674099,0.00810357,0.000212791,0.0339615,0.0266317,-0.0188528,⋯,-0.012985351,-0.0018916478,0.007902303,0.053626065,-0.04564304,0.015595516,0.018625615,0.0439581234,-0.009012608,0.004920957
UK01070117_3,-0.7738039,0.5846032,0.9325467,0.0164323,-0.048464298,0.00813966,-0.0476996,-0.0792683,0.0263629,-0.0822474,⋯,-0.017683965,-0.0004909425,-0.000824064,-0.015669003,-0.03225682,0.004759373,0.009953229,0.0006137304,0.016741733,-0.050861668
UK01080111_1,0.1305646,0.2888638,-0.6232047,0.161789,0.036224801,0.00467903,-0.048611499,0.00180036,-0.0352357,0.0501251,⋯,0.004055798,0.0026828586,-0.011826754,-0.026489872,-0.0107378,0.005128157,0.02581314,-0.031698368,-0.004046843,0.024874972
UK01110093_1,1.0909779,-1.248816,-0.4429439,-0.00629672,0.00282151,0.00699239,0.00435165,0.00268802,0.0166922,-0.00733983,⋯,-0.054920139,0.0207910453,0.03847084,0.005280355,0.01412896,-0.069819205,-0.01425291,-0.0625275028,-0.034057754,0.025129987
UK01110093_5,0.2302416,0.0,-0.3306479,-0.00629672,0.00282151,0.00699239,0.00435165,0.00268802,0.0166922,-0.00733983,⋯,-0.052643654,0.0541278245,-0.03659409,0.040833731,-0.04799052,-0.048906855,-0.051968698,-0.0303978809,0.011576313,-0.047137065


## Heatmap

Before performing single-variant association analysis, we want to check if any covariates are strongly associated with the module eigengenes.

In [17]:
all.vars <- merge(covs, eigengenes, by.x=0, by.y=0) %>%
    dplyr::select(-Row.names)

In [18]:
colors <- colorRampPalette(brewer.pal(11, "RdBu"))(101)

cor.mtx <- cor(all.vars)
cor.mtx.peer <- cor.mtx[peer, colnames(eigengenes)]
cor.mtx.pcs <- cor.mtx[pcs, colnames(eigengenes)]
cor.mtx.held.out <- cor.mtx[held.out, colnames(eigengenes)]

h1 <- Heatmap(cor.mtx.pcs, col=colors, heatmap_legend_param=list(
    title="Correlation",
    at=c(-1, 0, 1)
))

h2 <- Heatmap(cor.mtx.peer, col=colors, heatmap_legend_param=list(
    title="Correlation",
    at=c(-1, 0, 1)
))

h3 <- Heatmap(cor.mtx.held.out, col=colors, cluster_rows=F, heatmap_legend_param=list(
    title="Correlation",
    at=c(-1, 0, 1)
))

svg("04_Expression/results/eigengene_peer_factor_correlation.svg", width=8, height=12)
h1 %v% h2 %v% h3
dev.off()

The PEER factors are built using gene expression data. It's not surprising that many of the eigengenes are associated with the PEER factors. WGCNA discarded around 8000 genes. The residual gene expression variation from these genes is likely captured by some of the PEER factors. We model this specifically using 30 gene expression PCs from the discarded genes.

The eigengenes are not associated with genotyping PCs (which is good).

![](../results/eigengene_peer_factor_correlation.svg)

Ideally, I will include only sex and genotyping PCs as covariates in this mapping, since everything else (cell counts, SRS group) are important signatures that may be correlated with module eigengenes. I will include the 30 PCs from the unassigned gene expression data to control for technical confounders.

## Save Covariates

Save list of patients (family ID in the first column and individual ID in the second column) to subset the genotyping data using PLINK. There are 638 unique patients with genotyping information.

In [19]:
eigengene.patients <- sapply(strsplit(rownames(eigengenes), "_"), function(x) { x[1] })

geno.fam <- fread("~/gains_team282/Genotyping/All_genotyping_merged_filtered_b38_refiltered_rsID.fam") %>%
    dplyr::select(Family.ID=1, Individual.ID=2) %>%
    dplyr::mutate(GAinS.ID=gsub("^GA", "", Individual.ID)) %>%
    dplyr::filter(GAinS.ID %in% eigengene.patients) %>%
    unique()

In [20]:
dim(geno.fam)

In [21]:
write.table(
    geno.fam %>% dplyr::select(Family.ID, Individual.ID), 
    "~/gains_team282/nikhil/expression/eigengene_sva/mapping_patients.txt", row.names=F, quote=F, col.names=F, sep="\t"
)

Save the list of module eigengenes as a text file. This will be used by NextFlow to parallelize the association mapping.

In [22]:
write.table(colnames(eigengenes), "~/gains_team282/nikhil/expression/eigengene_sva/mapping_eigengenes.txt", row.names=F, quote=F, col.names=F, sep="\t")

### Initial Pass

I will be using a linear mixed model (LMM) to test for association between genotypes and eigengene expression. In the past, the lab has used a likelihood test (F-Test) to compare a null model of the covariates against an alternative model where the genotype is included. I will be building the following model:

1. Let $\mathbf{E}_i\in\mathbb{R}^n$ be a vector representing the values of the $i$-th eigengene.
2. Let $\mathbf{Y}\in\mathbb{R}^{n\times c}$ be a matrix of covariates. These covariates include Sex, 16 PEER factors, and 7 Genotyping PCs.
3. Let $\mathbf{Z}\in\mathbb{R}^n$ be a vector representing the random effects. The only random effect in this model is the Patient ID.
4. Let $\mathbf{X}\in\mathbb{R}^n$ be a vector representing the genotypes of the patients.
5. Let $\beta\in\mathbb{R}$ be a scalar value representing the genotypic effect on eigengene expression.
6. Let $\mathbf{\alpha}\in\mathbb{R}^c$ be a vector of covariate effects on eigengene expression.
7. Let $\gamma\in\mathbb{R}$ be a scalar value representing the random effect on eigengene expression.

The null model is:

$$\mathbf{E}_i \sim \mathbf{Y}\alpha + \mathbf{Z}\gamma$$

The alternative model is:

$$\mathbf{E}_i \sim \mathbf{X}\beta + \mathbf{Y}\alpha + \mathbf{Z}\gamma$$

There are 823 samples from the RNA-Seq data that have genotypes as well.

In [23]:
samples.with.genotypes <- rownames(eigengenes)[
    sapply(
        strsplit(rownames(eigengenes), "_"),
        function(x) { x[1] %in% geno.fam$GAinS.ID }
    )
]

length(samples.with.genotypes)

In [24]:
pcs

eigen.names <- colnames(eigengenes)
eigen.names

cov.names <- c("sex", paste0("PC", 1:7))
cov.names

rand.effect.names <- c("GAinS.ID")
rand.effect.names

In [25]:
mapping.data <- merge(eigengenes[samples.with.genotypes,], covs[samples.with.genotypes,], by.x=0, by.y=0) %>%
    dplyr::mutate(GAinS.ID.NonPrefix=sapply(strsplit(Row.names, "_"), function(x) { x[1] })) %>%
    merge(., geno.fam, by.x="GAinS.ID.NonPrefix", by.y="GAinS.ID") %>%
    dplyr::select(GAinS.ID=Individual.ID, everything()) %>%
    dplyr::select(Sample.ID=Row.names, any_of(rand.effect.names), any_of(eigen.names), any_of(cov.names), any_of(pcs))

Save the mapping data design matrix for linear mixed modeling.

In [26]:
write.csv(mapping.data, "~/gains_team282/nikhil/expression/eigengene_sva/mapping_data.csv", row.names=F)