# Procedures for downloading data from dbGaP and moving to MIDAS

Directions for direct connection to MIDAS without a password (ssh no password) can be 
found at the link: http://www.linuxproblem.org/art_9.html

**Note:** The three additional changes at the end of the page do need to be incorporated.


### phs000021_schiz
GAIN: Genome-Wide Association Study of Schizophrenia (phs000021.v3.p2)

https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs000021.v3.p2

In [1]:
## Local machine

# Decrypt (must be executed within project dir) 
cd /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556/
../../Desktop/sratoolkit/sratoolkit.2.8.2-1-win64/bin/vdb-decrypt.exe -v 58513


cd  /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556/58513/GAIN/Schizophrenia/phs000021v3/p2

# upload phenotype data to MIDAS
scp phs* jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/ \
    studies/phs000021_schiz/phenotype/unprocessed

# upload genotype data to MIDAS
scp -r phg000013v1 jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/ \
    data/amazon_s3/studies/phs000021_schiz/genotype/original/unprocessed
    
# Upload study (meta) files to MIDAS
scp {manifest_phs000021.GAIN_Schizophrenia.v3.p2.c1.GRU.pdf 
Release_Notes.phs000021.Schizophrenia.v3.p2.MULTI.pdf 
Study_Report.phs000021.GAIN_Schizophrenia.v3.p2.MULTI.pdf}
jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/studies/gwas-schiz/meta

    
## MIDAS

# genotype data 
cd /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000021_schiz/genotype/original
cp unprocessed/phg000013v1/* processing/
cd processing/

# This file is simply a compressed version of the same file in here
rm genotype-calls.Affymetrix6.v1.p1.MULTI.marker-info.tar.gz

# decompressing
for file in *.tar; do tar -xvf $file; done
tar -xvzf genotype-calls.Affymetrix6.v1.p1.MULTI.marker-info.tar.gz 
rm *.tar
rm *.gz
#for file in *; do echo "mv $i $(echo $i|perl -pi -e s/phg0*nia.//g)";done


# phenotype data 
cd /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000021_schiz/phenotype
cp unprocessed/* processing/
cd processing/
# renaming 
for i in *; do mv $i $(echo $i|perl -pi -e s/phs0*p2.//g)
for i in *GAIN*; do mv $i $(echo $i|perl -pi -e s/.*GAIN_//g); done

# unzip then remove tar files
for file in *txt.gz; do gunzip $file; done
for file in *tar.gz; do tar -xvzf $file; done
for file in *tar.gz; do rm $file; done

SyntaxError: invalid syntax (<ipython-input-1-4f3d006a8d0c>, line 4)

### phs000101_sclerosis
Genome-wide genotyping in amyotrophic lateral sclerosis and neurologically normal controls: first stage analysis and public release of data (phs000101.v4.p1)

https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs000101.v4.p1

In [None]:
## local machine ##


## MIDAS ##


# renaming phenotype files    
for i in *; do mv $i $(echo $i|perl -pi -e s/phs000101.v4.pht00.+ALS_//g); done


cd /share/nas04/bioinformatics_group/data/amazon_s3/studies/ \
phs000101_sclerosis/genotype/original

# untar & rename genotype data
cp unprocessed/GenotypeFiles/* processing/
cd processing/



### phs000127_irish-amy
Study of Irish Amyotrophic Lateral Sclerosis (SIALS) (phs000127.v2.p1)

https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs000127.v2.p1

In [None]:
## local maching ##

# download from dbGaP to local maching at
# /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556
cd /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556/

# decrypt in this directory
 ../../Desktop/sratoolkit/sratoolkit.2.8.2-1-win64/bin/vdb-decrypt.exe -v 58527
    
# Check for successful decryption
if [ $(find 58527/ -name *ncbi_enc -print | wc -l) = 0 ]; then echo "Success!"; else echo "Failed!"; fi

# Create directory structure on MIDAS
ssh jmarks@rtplhpc01.rti.ns "mkdir -p /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000127_irish-amy/\
{genotype,phenotype,meta}/"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000127_irish-amy/\
genotype/{original,imputed}"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000127_irish-amy/\
genotype/original/{processing,unprocessed,final}"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000127_irish-amy/\
phenotype/{processing,unprocessed,final}"

# upload files to MIDAS
scp *pdf jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000127_irish-amy/meta
scp *gz jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000127_irish-amy/phenotype/unprocessed
scp phg000026v1/* jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000127_irish-amy/genotype/original/unprocessed

## phs000167_moleGen-Schiz
Molecular Genetics of Schizophrenia - nonGAIN Sample (MGS_nonGAIN)(phs000167.v1.p1)

https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs000167.v1.p1

In [None]:
# download from dbGaP to local maching at
# /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556
cd /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556/

# decrypt in this directory
 ../../Desktop/sratoolkit/sratoolkit.2.8.2-1-win64/bin/vdb-decrypt.exe -v 58528

# Check for successful decryption
if [ $(find 58528/ -name *ncbi_enc -print | wc -l) = 0 ]; then echo "Success!"; else echo "Failed!"; fi

# Create directory structure on MIDAS
ssh jmarks@rtplhpc01.rti.ns "mkdir -p /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000167_moleGen-Schiz/\
{genotype,phenotype,meta}/"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000167_moleGen-Schiz/\
genotype/{original,imputed}"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000167_moleGen-Schiz/\
genotype/original/{processing,unprocessed,final}"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000167_moleGen-Schiz/\
phenotype/{processing,unprocessed,final}"

# upload files to MIDAS
scp *.pdf jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000167_moleGen-Schiz/meta
scp *.gz jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000167_moleGen-Schiz/phenotype/unprocessed
scp phg000037v1/* jmarks@rtplhpc01.rti.ns:/share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000167_moleGen-Schiz/genotype/original/unprocessed

    

## phs000147_breast-cancer 

In [None]:
# download from dbGaP to local maching at
# /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556
cd /cygdrive/c/Users/jmarks/ncbi/dbGaP-2556/

# decrypt in this directory
 ../../Desktop/sratoolkit/sratoolkit.2.8.2-1-win64/bin/vdb-decrypt.exe -v 58546
    
# Check for successful decryption
if [ $(find 58546/ -name *ncbi_enc -print | wc -l) = 0 ]; then echo "Success!"; else echo "Failed!"; fi


# Create directory structure for S3
mkdir -p phs000147_breast-cancer/
{genotype,phenotype,meta}/"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000147_breast-cancer/\
genotype/{original,imputed}"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000147_breast-cancer/\
genotype/original/{processing,unprocessed,final}"
ssh jmarks@rtplhpc01.rti.ns "mkdir /share/nas04/bioinformatics_group/data/amazon_s3/studies/phs000147_breast-cancer/\
phenotype/{processing,unprocessed,final}"

## Helpful commands and scratch work

In [None]:
for i in *; do echo "mv $i $(echo $i|perl -pi -e s/phs000101.v4.pht00.+ALS_//g)"; done

for i in *; do echo "mv $i $(echo $i|perl -pi -e s/phg000.+ALS_//g)"; done

for i in *; do mv $i $(echo $i|perl -pi -e s/phg000.+v[0-9].ALS_//g); done



for i in *tar; do tar -xvf $i; done
for i in *tar.gz; do tar -xvzf $i; done

rm *tar* 



