# EC2 Make Script
```
pi:ababaian
start: 2016 11 06
last completion : 2016 12 29 
```
## Introduction

On EC2, a standard system image can be saved. This script defines how the standard EC2 system was made and includes a version change-log as it's updated. Current major features are:

### Resources
* hg38 genome (fasta)
* hgr genome, single copy rDNA at chr13:1000000. (fa)
* hgr0 reference sequence
* rDNA (fa)

### Software
* blasr
* bowtie 1 & 2
* Cufflinks v2.2.1
* GATK 3.6
* samtools
* sl
* tophat
* UCSC utilities


### Things to add in next update

* picard
* hgr1 reference sequence
* bcftools v.1.4

# Make Scripts

In [None]:
#!/bin/bash/
# crown-161106
# ami-59b71739
# Primary Make Script
# Initialization script for an Ubuntu 16.04 LTS instance
# Run with at least 4 Gb of memory to compile successfully

# Update
sudo apt-get update

# Bioinformatics Software
sudo apt-get install samtools # v. 0.1.19
sudo apt-get install bowtie # v. 1.1.2
sudo apt-get install bowtie2 # v. 2.2.6
sudo apt-get install tophat
#sudo apt-get install blasr # v.

sudo apt-get install docker.io
sudo service docker start
sudo usermod -a -G docker ubuntu
# Need to re-login here. Possible split into multiple tasks

# AWS Command Line
sudo apt-get install awscli
aws configure # ENTER CREDENTIALS MANUALLY

# Small Binary Utilities Download (NCBI)
mkdir ~/bin; cd ~/bin

	wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/faToTwoBit
	wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/twoBitToFa
	wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/wigToBigWig
	wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/fastqToFa
	wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/faSplit

	chmod 755 *
cd ~

# Compiler Software
	sudo apt-get install build-essential
	sudo apt-get install gfortran
	sudo apt-get install graphviz
	sudo apt-get install libjpeg-dev
	sudo apt-get install libfreetype6-dev
	sudo apt-get install python


# Make Blastr from source
mkdir software; cd software
git clone git://github.com/PacificBiosciences/pitchfork

cd pitchfork
make init
cd deployment
sh setup-env.sh
cd ..
make blasr

cd workspace
ln -s hdf5-1.8.16 hdf5
ln -s blasr blasr_install
cat deployment/setup-env.sh >> /home/ubuntu/.bashrc

# Download hg38 (from UCSC)
	mkdir ~/resources; cd ~/resources
	wget ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit
	twoBitToFa hg38.2bit hg38.fa
	samtools faidx hg38.fa
	rm hg38.2bit

# Download hgr and rDNA (from S3)
	aws s3 cp s3://crownproject/resources/hgr.fa ./
	aws s3 cp s3://crownproject/resources/hgr_main.fa ./
	aws s3 cp s3://crownproject/resources/rDNA.fa ./


## Download Biocontainers
# git clone https://github.com/BioContainers/containers

# Install Git LFS
#	wget https://github.com/github/git-lfs/releases/download/v1.4.4/git-lfs-linux-amd64-1.4.4.tar.gz
#
#	tar -xvf git-lfs-linux-amd64-1.4.4.tar.gz
#	sudo sh git-lfs-1.4.4/install.sh	
#	rm git*



# Build Docker Container
	# docker run [OPTIONS] <IMAGE> 	<command> <arguments>
	# -V : create symbolic link between <env dir>:<container dir>
	# becomes
	# <Command> <Argument>
	
	# Build bowtie container
	# cd containers/bowtie/1.1.2/
	# docker build -t bowtie . #builds bowtie image
	# alias bowtie='docker run -V /home/ec2-user:/home/ bowtie bowtie'

	# Build Samtools 1.3 Container
	# cd containers/samtools/1.3.1/
	# docker build -t samtools . #builds samtools image


# Download Crown Project Files
#	git clone https://github.com/ababaian/Crown.git

##
##
## CROWN_INIT INSTANCE SCREENSHOT HERE
##
##

In [None]:
#!/bin/bash/
# crown-161229
# ami-22ca7b42

## HGR Reference Update ----------------------------------------------------
# There is an error on the standard hgr.fa and hgr_main.fa
# where the rDNA array is duplicated twice. Corrected version re-uploaded

# rm old references
cd ~/resources/
rm hgr.fa; hgr_main.fa

# Download new references
    # Single copy rDNA on chr13
    aws s3 cp s3://crownproject/resources/hgr.fa ./
        samtools faidx
        
    # Just the region of rDNA upto the end of 28S (45s)    
    aws s3 cp s3://crownproject/resources/hgr_45s.fa ./
    
    # hg38 genome with rDNA injection
    aws s3 cp s3://crownproject/resources/hg38r.2bit ./

# New Software ------------------------------------------------------------
# Genome Analysis Tookit (GATK) 3.6-0-g89b7209
# Requires Java (OpenDJK 1.8.0_111)
    sudo apt-get install default-jre
    sudo apt-get install default-jdk
    
# Download gatk from s3    
cd ~/software
    aws s3 cp s3://crownproject/software/GenomeAnalysisTK-3.6.tar.bz2 ./
    tar -xvf GenomeAnalysisTK-3.6.tar.bz2

# Build java running script 'gatk'
cd ~/bin
    echo '#!/bin/bash' > gatk
    echo '' >> gatk
    echo 'echo Running GATK' >> gatk
    echo 'java -Xmx4G -jar /home/ubuntu/software/GenomeAnalysisTK.jar $@' >> gatk

In [None]:
#!/bin/bash
# crown-170220
# ami-66129306

# Update from crown-161229 / ami-22ca7b42

# hgr0 Reference Update ------------------------------------------------
aws s3 cp s3://crownproject/resources/hgr0.fa ./
aws s3 cp s3://crownproject/resources/hgr0.gatk.fa ./
aws s3 cp s3://crownproject/resources/hgr0.gatk.dict ./

samtools faidx hgr0.fa
samtools faidx hgr0.gatk.fa


# New Software ---------------------------------------------------------
# Cufflinks
cd software
  aws s3 cp s3://mouseproject/cufflinks-2.2.1.Linux_x86_64.tar.gz ./
  tar -xvf cufflinks-2.2.1.Linux_x86_64.tar.gz
  mv cufflinks-2.2.1.Linux_x86_64/* ~/bin/
  rmdir cufflinks-2.2.1.Linux_x86_64/

# sl
  sudo apt-get install sl


In [None]:
# Holder space (Non Implemented Commands)

# BWA
# wget https://sourceforge.net/projects/bio-bwa/files/bwakit/bwakit-0.7.15_x64-linux.tar.bz2/download

