# eMerge Fagerstrom Test For Nicotine Dependence (FTND) GWAS
__Author:__ Jesse Marks

This document logs the steps taken to process the emerge data and perform the FTND GWAS. FTND is a standard instrument for assessing the physical addiction to nicotine. For more information, see [this website](https://cde.drugabuse.gov/instrument/d7c0b0f5-b865-e4de-e040-bb89ad43202b).

The genotype data were imputed on the [Michigan Imputation Server](https://imputationserver.sph.umich.edu/index.html).

* We use the variable `FTNDboth_cat` variable that lumps together the former smokers that have lifetime FTND (N=736) with the current smokers that have current FTND (N=78). This will optimize sample size, especially since the severe category is slim.

## FTNDboth_cat variable description
| cat | Freq |   |
|-----|------|---|
| 0   | 537  |   |
| 1   | 217  |   |
| 2   | 60   |   |

Where FTND conversion is 0=0-3, 1=4-6, and 2=7+

## Convert Phenotype Data from Strata format to .csv file

In [82]:
### R console ###
library(haven)

# convert Stata data into comma separated
setwd('C:/Users/jmarks/Desktop/Projects/Nicotine/eMerge/phenotype/') # local machine
pheno <- read_dta("FTND Phenotype.dta")
write.csv(pheno, file = "FTND_pheno.csv", row.names = F)
pheno.ftnd <- read.table("FTND_pheno.csv", header = T, sep = ",")
pheno.orig <- read.table("phs000170.v2.pht000737.v1.p1.c1.Marshfield_Data_Phenotype.HMB-GSO.txt", header = T)
num.subjects <- length(pheno.ftnd[['emergeid']])

pheno.combined <- data.frame(matrix(ncol = 4, nrow = num.subjects))
names(pheno.combined) <- c("emergeid", "sex", "age", "ftnd")
pheno.combined[1] <- pheno.ftnd[['emergeid']]
pheno.combined[4] <- pheno.ftnd[['FTNDboth_cat']]

for (i in 1:num.subjects) {
index <- match(pheno.ftnd[1,"emergeid"], pheno.orig[["SUBJID"]])
pheno.combined[i,"sex"] <- pheno.orig[index, "sex"]
pheno.combined[i,"age"] <- pheno.orig[index, "Age_First_Cataract_Diagnosis"]
}


In [99]:
head(pheno.combined)

# filter out any subject with NA for FTND
filter_pheno <- pheno.combined[complete.cases(pheno.combined),]
head(filter_pheno)
setwd("C:/Users/jmarks/Desktop/Projects/Nicotine/eMerge/phenotype")
write.table(filter_pheno, "emerge_ftnd_filtered.txt", sep = " ", row.names = F, quote = F)

emergeid,sex,age,ftnd
16214874,2,21,
16214875,2,21,
16214879,2,21,0.0
16214881,2,21,
16214896,2,21,0.0
16214899,2,21,


Unnamed: 0,emergeid,sex,age,ftnd
3,16214879,2,21,0
5,16214896,2,21,0
13,16215196,2,21,0
15,16215314,2,21,0
17,16215384,2,21,1
18,16215393,2,21,1


## Copy phenotype data to EC2

In [None]:
### local machine ###
cd /cygdrive/c/Users/jmarks/Desktop/Projects/Nicotine/eMerge/phenotype
scp -i ~/.ssh/gwas_rsa emerge_ftnd_filtered.txt ec2-user@35.171.207.199:/shared/s3/emerge/data/phenotype

## Inflate imputation results

In [None]:
### EC2 console ###
cd /shared/s3/emerge/data/genotype/imputed

# inflate chr results
for f in {1..22};do
echo '#!/bin/bash' > chr_$f.sh
echo '' >> chr_$f.sh
echo 'unzip -P "ScSu1byrJL49kO" chr_'$f'.zip' >> chr_$f.sh
done

for chr in {1..22}; do
sh /shared/bioinformatics/software/scripts/qsub_job.sh \
--job_name inflate_chr${chr} \
--script_prefix test/chr${chr}_results \
--mem 5 \
--priority 0 \
--program bash chr_${chr}.sh
done

## S3 data transfer

In [None]:
# Copy phenotype data
cd /shared/s3/emerge/data/phenotype
aws s3 cp ./ s3://rti-nd/eMERGE/emerge_ftnd/data/phenotype \
    --recursive --exclude="*" --include="*ped.gz" --quiet &

cd /shared/s3/emerge_ftnd/data/assoc_tests
# Copy association test results
aws s3 cp ./ s3://rti-nd/eMERGE/emerge_ftnd/results/rvtest/ \
    --recursive --exclude="*" --include="*MetaScore*gz*" --quiet &
aws s3 cp ./ s3://rti-nd/eMERGE/emerge_ftnd/results/figures/ \
    --recursive --exclude="*" --include="*.png.gz" --quiet &






ancestry=ea
# copy imputation files
cd /shared/sandbox/emerge_ftnd/genotype/imputed/

# chrX (male) data
aws s3 cp chrX.no.auto_male.dose.vcf.gz.tbi s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/ea/chrX.no.auto_male.dose.vcf.gz.tbi 
aws s3 cp chrX.no.auto_male.dose.vcf.gz s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/ea/chrX.no.auto_male.dose.vcf.gz --quiet &     
aws s3 cp chrX.no.auto_male.info.gz s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/ea/chrX.no.auto_male.info.gz --quiet &     

# chrX (female) data
aws s3 cp chrX.no.auto_female.dose.vcf.gz.tbi s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/ea/chrX.no.auto_female.dose.vcf.gz.tbi 
aws s3 cp chrX.no.auto_female.dose.vcf.gz s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/ea/chrX.no.auto_female.dose.vcf.gz --quiet &     
aws s3 cp chrX.no.auto_female.info.gz s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/ea/chrX.no.auto_female.info.gz --quiet &     


aws s3 cp snp_stats/ s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/${ancestry}/ \
    --recursive --exclude "*" --include "*.txt" --quiet &
aws s3 cp qc_report/ s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/${ancestry}/ \
    --recursive --exclude "*" --include "*.html" --quiet &
aws s3 cp logs/ s3://rti-nd/eMERGE/emerge_ftnd/data/genotype/imputed/${ancestry}/ \
    --recursive --exclude "*" --include "*.log" --quiet &

# submit upload as a job
--------------------------------------------------------------------------------
## copy imputation files
/shared/s3/emerge/data/genotype/imputed
ancestry=ea
for ext in {\"*.info.gz\",\"*dose.vcf.gz\",\"*dose.vcf.gz.tbi\",\"*.txt\"}; do
/shared/bioinformatics/software/scripts/qsub_job.sh \
        --job_name s3_upload_emerge \
        --script_prefix _${ext} \
        --mem 5 \
        --nslots 1 \
        --program "aws s3 cp . s3://rti-nd/data/genotype/imputed/${ancestry}/ \
            --recursive --exclude \"*\" --include $ext "
done