The purpose of this notebook is to perform GWAS for all of the ancestry-associated genes.

The question we are asking here is whether there are any germline variants that are associated with these dependencies. 

# Set up the environment

In [1]:
#Arguments/Parameters

working_dir = "/home/jupyter/notebooks/Ancestry"
workspace_bucket = Sys.getenv('WORKSPACE_BUCKET')

In [2]:
#load packages
library(tidyverse)
library(reshape2)

#Define functions
show_msg <- function(x){ 
    print(x)
    flush.console()
}

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.4     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths




In [17]:
#Create directory structure
system(glue::glue("
if [ ! -d 'mkdir {working_dir}/gwas' ] 
then
mkdir {working_dir}/gwas
fi

if [ ! -d 'mkdir {working_dir}/gwas/gwas_output' ] 
then
mkdir {working_dir}/gwas_output
fi

#Create the directory to store the liftover files
if [ ! -d '{working_dir}/LiftOver' ] 
then
mkdir LiftOver
fi

if [ ! -d '{working_dir}/LiftOver/hg19_to_hg38' ] 
then
mkdir LiftOver/hg19_to_hg38
fi
"))

In [5]:
#Download and unpack PLINK
system(glue::glue("
mkdir {working_dir}/software/plink2
cd {working_dir}/software/plink2
wget https://s3.amazonaws.com/plink2-assets/plink2_linux_avx2_20210826.zip
unzip plink2_linux_avx2_20210826.zip
"))


system(glue::glue("
mkdir {working_dir}/software/plink
cd {working_dir}/software/plink
wget https://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20210606.zip
unzip plink_linux_x86_64_20210606.zip
"))

In [35]:
#Download and install bcftools
#Install bcftools
step_install_bcftools <- !file.exists(glue::glue("{working_dir}/software/bcftools/bcftools"))

if(step_install_bcftools) {
system(glue::glue("
cd {working_dir}/software
git clone --recurse-submodules git://github.com/samtools/htslib.git
git clone git://github.com/samtools/bcftools.git
cd bcftools
autoheader && autoconf && ./configure --enable-libgsl --enable-perl-filters
make
export BCFTOOLS_PLUGINS=/home/jupyter-user/notebooks/Ancestry/software/bcftools/plugins
")) } else {print("bcftools is already installed")}

#Set the bcftools plugin path and add bcftools to PATH
system(glue::glue("
export PATH=$PATH:{working_dir}/software/bcftools
export BCFTOOLS_PLUGINS='/home/jupyter/notebooks/Ancestry/software/bcftools/plugins'
"))

[1] "bcftools is already installed"


In [14]:
#Install Picard if it needs to be installed
system(glue::glue("
cd {working_dir}

#Create the directory to store the liftover files
if [ ! -d '{working_dir}/software/picard' ] 
then
mkdir {working_dir}/software/picard
cd {working_dir}/software/picard
wget https://github.com/broadinstitute/picard/releases/download/2.25.7/picard.jar
fi
"))

In [7]:
#Install Tabix

# Format the data

Much of this code is borrowed from the Genotype Phasing and RFMix notebook, which is part of this project.

In [19]:
#Download the CCLE data
system(glue::glue("
cd {working_dir}/gwas
gsutil cp gs://fc-45c0e148-0b1c-4244-9bfc-feb559bbc514/recoded.ccle.all.called.vcf .
bgzip recoded.ccle.all.called.vcf
"))

#Index ccle.all.called.vcf
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/tabix-0.2.6/tabix -p vcf recoded.ccle.all.called.vcf.gz
"))

The header of the vcf file has some strange cell line names as the sample names. But we don't want this. The CDS names are stored in the vcf header, so we can extract them and then assign them as the sample names in the vcf.

In [20]:
#Fix the header in ccle.all.called.vcf to conver the sample ID to the CDS ID

#First export the header and format it so that it is ready for us to work with it in R
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/bcftools/bcftools view -h recoded.ccle.all.called.vcf.gz -o sample.header.txt
head -n 118 sample.header.txt | tail -n 1 | tr -d '#' > sample.header.for.r.txt
rm sample.header.txt
"))


#Format a dataset where each new "CDS ID" sample header is on a new row, then write it.
paste(working_dir, "/gwas", sep = "") %>% setwd()
sample.header <- read.table('sample.header.for.r.txt', sep = "\t")
sample.header <- sample.header[,1] %>% as.vector()
split.row <- data.frame(strsplit(sample.header, " ")) #split the row on white space
split.row <- as.vector(split.row[,1]) #Convert it to a vector
split.row <- split.row[9:length(split.row)] #Remove the first 8 elements since they don't contain sample names
split.row <- gsub(".*/", "", split.row) #Each element has a '/' before the CDS, and no slashes after. So we can use gsub to remove all the junk
split.row <- gsub('_cnn_filtered.vcf.gz', '', split.row) #Now each CDS ID has "_cnn_filtered.vcf.gz" after it, so remove that.
split.row <- head(split.row, -5) #Remove the last 5 elelemnts since they are not sample names
split.row <- gsub(";", "", split.row)
split.row <- data.frame(split.row)
dim(split.row)
write.table(split.row, "cds.name.list.txt", sep = "\t", col.names = F, row.names = F, quote = F)

#Replace the sample names in ccle.all.called.vcf.gz file
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/bcftools/bcftools reheader --samples cds.name.list.txt -o cdsnames.ccle.all.called.vcf.gz recoded.ccle.all.called.vcf.gz
mv cdsnames.ccle.all.called.vcf.gz ccle.all.called.vcf.gz
"))

#Clean up
system(glue::glue("
cd {working_dir}/gwas
rm cds.name.list.txt
rm sample.header.for.r.txt
rm ccle.all.called.vcf.gz.tbi
"))

#Re-index the vcf file
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/tabix-0.2.6/tabix -p vcf ccle.all.called.vcf.gz
"))

The variant calls are (unfortunately) hg19. Let's use LiftOver to convert them to hg38.

In [15]:
#Download the LiftOver files
system(glue::glue("
cd {working_dir}/LiftOver/hg19_to_hg38
wget https://hgdownload.soe.ucsc.edu/goldenPath/hg19/liftOver/hg19ToHg38.over.chain.gz
wget https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
"))

#Construct the GATK dict file
system(glue::glue("
cd {working_dir}/LiftOver/hg19_to_hg38
java -jar {working_dir}/software/picard/picard.jar CreateSequenceDictionary R=hg38.fa.gz O=hg38.dict
"))

In [21]:
#LiftOver the variant calls to hg38 because they are currently hg19
system(glue::glue("
java -jar {working_dir}/software/picard/picard.jar LiftoverVcf I={working_dir}/gwas/ccle.all.called.vcf.gz O={working_dir}/gwas/hg38.ccle.all.called.vcf MAX_RECORDS_IN_RAM=10000 CHAIN={working_dir}/LiftOver/hg19_to_hg38/hg19ToHg38.over.chain.gz REJECT={working_dir}/gwas/liftover_rejected_variants.vcf R={working_dir}/LiftOver/hg19_to_hg38/hg38.fa.gz
"))

Let's try to recode the file as a .bcf file instead of a .vcf.gz file. This should hopefully make things run much faster.

In [23]:
#Re-code the vcf file so that it is a bcf file. This should hopefully make things run much faster.
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/bcftools/bcftools view hg38.ccle.all.called.vcf -o hg38.ccle.all.called.bcf -Ou
{working_dir}/software/bcftools/bcftools index -c -f hg38.ccle.all.called.bcf
"))

Variants at the same position are grouped together right now. So we should un-group them.

For example:

Current format: 
chr1 // pos123456789 // ref=A // alt=G/T

New format: 
chr1 // pos123456789 // ref=A // alt=G
chr1 // pos123456789 // ref=A // alt=T


In [24]:
#split the genotype calls
show_msg("Splitting Genotype Calls")
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/bcftools/bcftools norm -m - hg38.ccle.all.called.bcf -Ou -o split.hg38.ccle.all.called.bcf
{working_dir}/software/bcftools/bcftools index -c -f split.hg38.ccle.all.called.bcf
"))

[1] "Splitting Genotype Calls"


Some of the samples are WGS and other samples are WES. We should filter it down to only include exonic regions so that we are not introducing bias into the analysis.

In [31]:
#Download a bed file that has the exon positions
system(glue::glue("
cd {working_dir}/gwas
gsutil cp gs://fc-7211fd5c-103b-4f7b-831a-1f2def618f62/exon_positions .
"))

#Filter the file so that it only includes exons
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/bcftools/bcftools view --force-samples -R exon_positions split.hg38.ccle.all.called.bcf -Ou -o exon.hg38.ccle.all.called.bcf
{working_dir}/software/bcftools/bcftools index -c -f exon.hg38.ccle.all.called.bcf
"))


In [43]:
#Also, we should filter the data so that it only includes the samples with WES and WGS data

#Download the CCLE sample tracker
system(glue::glue("
cd {working_dir}/gwas
gsutil cp gs://fc-45c0e148-0b1c-4244-9bfc-feb559bbc514/ccle_sample_tracker.csv .
"))


#Construct a list of samples that we want to keep
paste(working_dir, "/gwas", sep = "") %>% setwd()
cds.to.ach <- read.table('ccle_sample_tracker.csv', sep = ",", header = T)
cds.to.ach <- cds.to.ach[cds.to.ach$datatype %in% c("wgs", "wes"),]
cds.to.ach <- cds.to.ach[,1]
cds.to.ach <- cds.to.ach %>% unique()
write.table(cds.to.ach, "samples.to.keep.txt", sep = "\t", col.names = F, row.names = F, quote = F)


#Now subset the vcf file so that it only includes samples that we have WES/WGS for
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/bcftools/bcftools view -S samples.to.keep.txt --force-samples -Ou -o weswgs.hg38.ccle.all.called.bcf exon.hg38.ccle.all.called.bcf
{working_dir}/software/bcftools/bcftools index -c -f weswgs.hg38.ccle.all.called.bcf
"))

#Extract the sample names from the vcf file and write them to a file
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/bcftools/bcftools query -l weswgs.hg38.ccle.all.called.bcf > weswgs.hg38.ccle.all.called.sample.names
"))

#Create a file to bridge the CDS IDs to ACH IDs
paste(working_dir, "/raw_data", sep = "") %>% setwd()
sample.header <- read.table('weswgs.hg38.ccle.all.called.sample.names', sep = "\t") #Load in the sample header that contains all of the samples in the vcf file
sample.header <- sample.header[,1] %>% as.vector() #Convert it to a vector
sample.tracker <- read.table('ccle_sample_tracker.csv', sep = ",", header = T) #Load in the ccle.sample.tracker that bridges the cds ID to the ACH ID
sample.tracker <- sample.tracker[sample.tracker$datatype %in% c("wgs", "wes"),] #Filter the sample tracker so that it only includes WES and WGS data
sample.header <- plyr::mapvalues(sample.header, from = sample.tracker$cds_id, to = sample.tracker$arxspan_id, warn_missing = FALSE)
write.table(sample.header %>% unique(), "ach_samples_to_keep.txt", sep = "\t", col.names = F, row.names = F, quote = F)
sample.header <- sub('[.]', '_', make.names(sample.header, unique=TRUE))
write.table(sample.header, "ach_sample_header.txt", sep = "\t", col.names = F, row.names = F, quote = F)


#Replace the sample names in the vcf file to remove the duplicates by appending a _1 to the duplicated sample
system(glue::glue("
cd {working_dir}/gwas
rm ccle.hg38.splice.wgswes.sample.names
{working_dir}/software/bcftools/bcftools reheader --samples ach_sample_header.txt -o achid.hg38.ccle.all.called.bcf weswgs.hg38.ccle.all.called.bcf
{working_dir}/software/bcftools/bcftools index -c -f achid.hg38.ccle.all.called.bcf
"))

ERROR: Error in read.table("weswgs.hg38.ccle.all.called.sample.names", sep = "\t"): no lines available in input


In [48]:
#recode the ./. variants to 0/0
system(glue::glue("
cd {working_dir}/gwas
export PATH=$PATH:{working_dir}/software/bcftools
export BCFTOOLS_PLUGINS='/home/jupyter/notebooks/Ancestry/software/bcftools/plugins'
{working_dir}/software/bcftools/bcftools +setGT achid.hg38.ccle.all.called.bcf -Ou -o complete.hg38.ccle.all.called.bcf -- -t . -n 0
{working_dir}/software/bcftools/bcftools index -c -f complete.hg38.ccle.all.called.bcf
"))


In [49]:
#Convert the dataset so that it is in the plink format
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/plink2/plink2 --bcf depmap.hg38.ccle.all.called.bcf --allow-extra-chr --make-bed --out complete.hg38.ccle.all.called
"))

# Create the GWAS Phenotype and Covariate files

First get all of the covariate data together.

In [50]:
#Perform PCA for GWAS covariate
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/plink2/plink2 --bfile complete.hg38.ccle.all.called --allow-extra-chr --pca 10 --out complete.hg38.ccle.all.called.pca10
"))

In [4]:
#Format the cell covariates

#Download the DepMap sample annotation file 
system(glue::glue("
cd {working_dir}/gwas
gsutil cp gs://fc-45c0e148-0b1c-4244-9bfc-feb559bbc514/internal-21q3_v15-sample-info.csv . .
"))

#Load in the sample annotation file
paste(working_dir, "/gwas", sep = "") %>% setwd()
cell.annotation <- read.table('internal-21q3_v15-sample-info.csv', sep = ",", fill = TRUE, header = TRUE) #Load in the sample header that contains all of the samples in the vcf file

#Format the covariates file
covariates <- cbind(cell.annotation$DepMap_ID, cell.annotation$lineage) %>% 
data.frame %>%
rename("ach_id" = 1, "lineage" = 2)
head(covariates)
write.table(covariates, "cell.covariates.txt", sep = "\t", col.names = T, row.names = F, quote = F)

Unnamed: 0_level_0,ach_id,lineage
Unnamed: 0_level_1,<chr>,<chr>
1,ACH-000001,ovary
2,ACH-000002,blood
3,ACH-000003,colorectal
4,ACH-000004,blood
5,ACH-000005,blood
6,ACH-000006,blood


In [31]:
#Load in both of the covariate files and then merge them together into a single .cov file that is in the correct format
paste(working_dir, "/gwas", sep = "") %>% setwd() #set the working directory
pca.cov <- read.table('complete.hg38.ccle.all.called.pca10.eigenvec', sep = "\t", fill = TRUE) #Covariates from PCA
pca.cov <- pca.cov[,-1] #Remove the first column from pca.cov since we don't care about it
lineage.cov <- read.table('cell.covariates.txt', sep = "\t", fill = TRUE, header = TRUE) #Covariates from annotation file

#Assign the column names to pca.cov
colnames(pca.cov) <- c("ach_id", "pc1", "pc2", "pc3", "pc4", "pc5", "pc6", "pc7", "pc8", "pc9", "pc10")


#The lineage.cov file is incorrectly coded as the cell line names having a - instead of a _. 
#I say it's incorrect because the vcf/plink files have it as an _, and I don't feel like modifying those files
lineage.cov$ach_id <- gsub("-", "_", lineage.cov$ach_id)
head(lineage.cov)

#Convert the tumor types into numbers so that we can add it to the covariates file
unique.lineage <- lineage.cov$lineage %>% unique()
lineage.numbers <- seq(from = 1, to = length(unique.lineage), by = 1)
lineage.encoding <- cbind(unique.lineage, lineage.numbers) %>% data.frame()
colnames(lineage.encoding) <- c("lineage", "number")
lineage.cov$lineage <- plyr::mapvalues(lineage.cov$lineage, from = lineage.encoding$lineage, to = lineage.encoding$number)
head(lineage.encoding)


#merge the two datasets together
pca.lineage.cov <- merge(pca.cov, lineage.cov, by = "ach_id")


#format the .cov file
covariate.file <- pca.lineage.cov
colnames(covariate.file)[1] <- c("IID")


#Look at the head of each df to make sure that they are formatted correctly
head(covariate.file)


#write the covariate file
write.table(covariate.file, "depmap.cov", sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE)

Unnamed: 0_level_0,ach_id,lineage
Unnamed: 0_level_1,<chr>,<chr>
1,ACH_000001,ovary
2,ACH_000002,blood
3,ACH_000003,colorectal
4,ACH_000004,blood
5,ACH_000005,blood
6,ACH_000006,blood


Unnamed: 0_level_0,lineage,number
Unnamed: 0_level_1,<chr>,<chr>
1,ovary,1
2,blood,2
3,colorectal,3
4,skin,4
5,urinary_tract,5
6,lung,6


Unnamed: 0_level_0,IID,pc1,pc2,pc3,pc4,pc5,pc6,pc7,pc8,pc9,pc10,lineage
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,ACH_000001,-0.000791137,0.000700169,0.000914316,-0.000919319,-0.000335148,0.000578365,-0.00109101,-0.00105638,-0.00106275,0.000123351,1
2,ACH_000002,-0.000746101,0.000703607,0.00100001,-0.000973165,-0.000343036,0.00057675,-0.000895354,-0.00118509,-0.00117146,4.58036e-05,2
3,ACH_000004,-0.000745383,0.000644963,0.00091704,-0.000897439,-0.00047207,0.000553266,-0.00099379,-0.00102955,-0.00105329,0.000253595,2
4,ACH_000005,-0.000762013,0.000694865,0.000943667,-0.000947499,-0.000467193,0.00066525,-0.000985791,-0.00107016,-0.00113868,0.000337099,2
5,ACH_000006,-0.000763688,0.000670431,0.000919129,-0.000924049,-0.000373714,0.000629725,-0.000905337,-0.000985003,-0.00111806,0.000146758,2
6,ACH_000007,-0.00080299,0.00069443,0.00108927,-0.0011516,-0.000645693,0.0048527,0.00431813,-0.00123761,-0.00149745,0.000587248,3


Now get all of the phenotype data

In [32]:
#Get a vector of all of the ancestry-associated genes.
paste(working_dir, "/ccle_ancestry_analysis", sep = "") %>% setwd() #set the working directory
ancestry.associated.genes = read.table('ancestry_chronos_logistic_correlations.txt', sep = "\t", header = T) %>%
mutate(fdr_afr = p.adjust(pval_afr, method = "BH", n = n())) %>%
mutate(fdr_amr = p.adjust(pval_amr, method = "BH", n = n())) %>%
mutate(fdr_eas = p.adjust(pval_eas, method = "BH", n = n())) %>%
mutate(fdr_eur = p.adjust(pval_eur, method = "BH", n = n())) %>%
mutate(fdr_sas = p.adjust(pval_sas, method = "BH", n = n())) %>%
filter(fdr_afr < 0.05 | fdr_amr < 0.05 | fdr_eas < 0.05 | fdr_eur < 0.05 | fdr_sas < 0.05) %>%
pull(gene) 

In [33]:
#Format the phenotype file

#Download the depmap chronos scores
system(glue::glue("
cd {working_dir}/gwas
gsutil cp gs://fc-45c0e148-0b1c-4244-9bfc-feb559bbc514/CRISPR_gene_effect.csv .
"))

#Load the chronos scores and format it
paste(working_dir, "/gwas", sep = "") %>% setwd() #set the working directory
chronos.scores <- read.table('CRISPR_gene_effect.csv', sep = ",", header = F)
colnames(chronos.scores) = sub(" .*", "", chronos.scores[1,]) #Remove all of the junk after the gene name
chronos.scores <- chronos.scores[-1,] #Remove the old 'col.names' row.
colnames(chronos.scores)[1] <- "IID" #Convert the column name for the first column to IID, as per the formatting requirements for the .pheno file
chronos.scores$IID <- gsub("-", "_", chronos.scores$IID) #Replace the - with a _ to match the notation in the plink files
chronos.scores[chronos.scores == ""] <- 0 #Replace the missing values with 0. Clearly not the best solution, but it is A solution. And these are rare cases, so it shouldn't affect it too much.   


#Now filter it so that it only includes the high variance genes
genes.to.keep = intersect(colnames(chronos.scores), ancestry.associated.genes)
chronos.scores = chronos.scores %>%
select(IID, all_of(genes.to.keep))


dim(chronos.scores)
chronos.scores[1:10, 1:10] #Print the head of the dataset so that we can take a look at it

#write the phenotype.file
write.table(chronos.scores, "chronos_gwas_phenotypes.pheno", sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE)

Unnamed: 0_level_0,IID,ACAN,ACSM5,ATAD3B,C22orf31,C2orf80,C7orf57,CBFA2T2,CCDC74B,CEP19
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
2,ACH_000001,0.0138311770360225,0.0679110091625411,0.1146164130920919,0.0483921118962064,-0.0835227488877239,0.1967985985551586,0.0727241219550182,-0.239329152936503,0.0406172972828068
3,ACH_000004,-0.1688282084705893,-0.0524672453106868,0.0865467985320738,0.0575247402183774,0.0467931141753831,-0.0028459223546237,-0.0179349362885437,0.117316919081466,-0.0063397932232908
4,ACH_000005,0.0031318407672419,0.1023121781128291,0.0346798024474173,0.0909168349775628,-0.1092391614740438,0.0354518480603664,0.0167653671159481,0.0341126337768,-0.0454750194130222
5,ACH_000007,-0.0759749334019446,0.0268292497239602,-0.0836165104326996,0.0301576904787381,-0.0868309359585722,0.1556406744369103,-0.0197245684138509,-0.2155225465834067,-0.0204307085975022
6,ACH_000009,-0.0806789333560812,0.1332441797668063,-0.055688579053877,0.0648441616403983,-0.1126610125995181,0.1548850087626561,-0.0314992406529438,-0.2434548682325304,-0.0080817112230735
7,ACH_000011,-0.1752479874187021,-0.1315974312683737,-0.0518226960476464,0.2238278924860508,-0.0689300756771018,0.0632339666118478,-0.145729017449773,-0.2980727246644349,-0.0695785548406427
8,ACH_000012,0.0362920524906266,-0.0230508844397713,-0.1971744392849028,0.0286799325979006,-0.1596492885798107,0.1056070413461159,0.2545374765727802,-0.1782039525307162,0.0706677972822247
9,ACH_000013,0.1621765291914784,0.0620629664164877,0.0416631056766779,0.0502892209933478,0.0458851691883693,0.0746832073169272,0.1592101798528528,-0.12165843633306,-0.1637705955916642
10,ACH_000014,0.06651211217853,-0.0273130405642804,0.0754365663380433,0.0453911184586443,0.0512957486163941,-0.0545540925001247,-0.007534950227477,-0.2932366417842942,0.0569401362667796
11,ACH_000015,-0.0085837525341264,-0.1206994713307842,0.0444397739101214,0.0491633461498805,0.0669012966780442,0.1232012517726198,0.0205538546013211,-0.0833579100210263,-0.035742639152373


In [19]:
#Run GWAS!!!
system(glue::glue("
cd {working_dir}/gwas
{working_dir}/software/plink2/plink2 --bfile complete.hg38.ccle.all.called --allow-extra-chr --glm hide-covar --pfilter 1 --maf 0.01 --covar 'iid-only' depmap.cov --pheno chronos_gwas_phenotypes.pheno --out {working_dir}/gwas_output
cd {working_dir}/gwas_output
rm splice_gwas_out.log
"))