# TCGA-COAD RNA-seq hgr1 alignments
```
pi:ababaian
files: ~/Crown/data2/tcga_1_coad
start: 2018 08 16
complete : 2018 08 22
```
## Introduction

hgr1 Alignment of the TCGA-COAD data. Only libraries for which there is a matched-normal from the same sample (41 patients and 82 libraries).


## Materials and Methods

### Scripts

#### hgr1 Alignment

In [None]:
#!/bin/bash
# 1kg_align_v2.tcga.1.sh
# rDNA alignment pipeline
# 180816 build -- TCGA
# AMI: crown-180813 - ami-0031fd61f932bdef9
# EC2: c4.2xlarge (8cpu / 15 gb)
# EC2: c4.xlarge  (4cpu / 8  gb)
# Storage: 300 Gb
#

# Input Requirements --------------------------

# $1 : Library name and file-output name
# $2 : Library population/analysis set
# $3 : Library UUID

# Control Panel -------------------------------
# CPU
	THREADS='3'

# Sequencing Data
	LIBRARY=$1 # Library/ File name

# TCGA FILE UUID
  UUID=$3

 # FastQ File-names
    FQ0="$LIBRARY.tmp.sort.0.fq"
    FQ1="$LIBRARY.tmp.sort.1.fq"
    FQ2="$LIBRARY.tmp.sort.2.fq"
    
# Read Group Data
# Extract from downloaded BAM file / input
	RGPO=$2 # Patient Population

	#RGSM= # Sample. Patient Identifer
	#RGID= # Read Group ID. Accession Number
    
	RGLB=$LIBRARY # Library Name. Accession Number
	RGPL='ILLUMINA'  # Sequencing Platform.
    
	# Extract Sequencing Run Info
	#  RGPU=$(gzip -dc $FQ1 | head -n1 - | cut -f1 -d':' | cut -f2 -d' ')

# Initialize wordir ---------------------------

# Make working directory
  mkdir -p align; cd align

# Copy hgrX genome and create bowtie2 index
  cp ~/resources/hgr1/* ./
  
# Download RNAseq BAM file
# with a UUID as input
  ~/bin/gdc-client download -t ~/resources/gdc.token -d ./ \
  -n $THREADS $UUID
  
# Link the RNA-seq bamfile which is called by its UID to workdir
  ln -s */*.bam input.bam
  
# Extract ReadGroup Sample Name (SM)
  RGSM=$(~/bin/samtools view -H input.bam | grep '^@RG' | sed "s/.*SM:\([^\t]*\).*/\1/g" | uniq )

# Extract ReadGroup identifier (ID)
  RGID=$(~/bin/samtools view -H input.bam | grep '^@RG' | sed "s/.*ID:\([^\t]*\).*/\1/g" | uniq )

# Convert input bam file to fastq files for re-alignment
~/bin/samtools sort -@ $THREADS-n input.bam | \
    ~/bin/samtools fastq -@ $THREADS -O \
    -0 $FQ0 \
    -1 $FQ1 \
    -2 $FQ2 -

# SINGLE END READS ====================================================

if [ -s $FQ0 ]
then
    # Single-End Extracted Reads Alignment

    # Extract Sequencing Run Info
    #RGPU=$(gzip -dc $FQ0| head -n1 - | cut -f1 -d':' | cut -f2 -d' ')
    RGPU=$(head -n1 $FQ0 | cut -f1 -d':' | cut -f2 -d' ')

    # Bowtie2: align to genome
    bowtie2 --very-sensitive-local -p $THREADS \
      --rg-id $RGID --rg LB:$RGLB --rg SM:$RGSM \
      --rg PL:$RGPL --rg PU:$RGPU \
      -x hgr1 -U $FQ0 | \
      ~/bin/samtools view -bS - > aligned_unsorted.bam
     
    # Calculate library flagstats
      ~/bin/samtools flagstat aligned_unsorted.bam > aligned_unsorted.flagstat
      rm $FQ0 # Remove fastq files to save space

    # Read Subset ------------------------------
    # Extract mapped reads, and their unmapped pairs

      # Extract Header
      ~/bin/samtools view -H aligned_unsorted.bam > align.header.tmp

      # Extract Mapped Reads
      ~/bin/samtools view -b -F 4 aligned_unsorted.bam | \
      ~/bin/samtools sort -@ $THREADS -O BAM - > align.hgr1.bam #mapped
      
    # Calcualte library flagstats
    ~/bin/samtools index align.hgr1.bam
    ~/bin/samtools flagstat align.hgr1.bam > align.hgr1.flagstat

    # Rename the total Bam Files
      #mv aligned_unsorted.bam $LIBRARY.se.bam
      #mv aligned_unsorted.bam.bai $LIBRARY.se.bam.bai
      mv aligned_unsorted.flagstat $LIBRARY.se.flagstat

    # Rename the hgr-aligned Bam files
      mv align.hgr1.bam $LIBRARY.hgr1.se.bam
      mv align.hgr1.bam.bai $LIBRARY.hgr1.se.bam.bai
      mv align.hgr1.flagstat $LIBRARY.hgr1.se.flagstat
      
    # Alignments (Full)
    aws s3 cp $LIBRARY.se.flagstat s3://crownproject/tcga/

    # Alignments (Aligned)
    aws s3 cp $LIBRARY.hgr1.se.bam s3://crownproject/tcga/
    aws s3 cp $LIBRARY.hgr1.se.bam.bai s3://crownproject/tcga/
    aws s3 cp $LIBRARY.hgr1.se.flagstat s3://crownproject/tcga/

fi

# PAIRED END READS ====================================================


if [ -s $FQ1 ]
then
    # Paired-End Extracted Reads Alignment

    # Extract Sequencing Run Info
    #RGPU=$(gzip -dc $FQ1| head -n1 $FQ1 | cut -f1 -d':' | cut -f2 -d' ')
    RGPU=$(head -n1 $FQ1 | cut -f1 -d':' | cut -f2 -d' ')
    
    # Bowtie2: align to genome
    bowtie2 --very-sensitive-local -p $THREADS \
      --rg-id $RGID --rg LB:$RGLB --rg SM:$RGSM \
      --rg PL:$RGPL --rg PU:$RGPU \
      -x hgr1 -1 $FQ1 -2 $FQ2 | \
      ~/bin/samtools view -bS - > aligned_unsorted.bam
      
    # Calcualte library flagstats
      ~/bin/samtools flagstat aligned_unsorted.bam > aligned_unsorted.flagstat
      
      rm $FQ1 $FQ2 # Remove fastq files to save space

      
    # Read Subset ------------------------------
    # Extract mapped reads, and their unmapped pairs

      # Extract Header
      ~/bin/samtools view -H aligned_unsorted.bam > align.header.tmp

      # Unmapped reads with mapped pairs
      # Extract Mapped Reads
      # and their unmapped pairs
      ~/bin/samtools view -b -F 4 aligned_unsorted.bam > align.F4.bam #mapped
      ~/bin/samtools view -b -f 4 -F 8 aligned_unsorted.bam > align.f4F8.bam #unmapped pairs

      # Extract just the 45S unit
      #aws s3 cp s3://crownproject/resources/rDNA_45s.bed ./
      #~/bin/samtools view -b -L rDNA_45s.bed align.F4.bam > align.F4.45s.bam

      # What are the mapped readnames
      ~/bin/samtools view align.F4.bam | cut -f1 - > read.names.tmp

      # Extract mapped reads
      ~/bin/samtools view align.F4.bam | grep -Ff read.names.tmp - > align.F4.tmp.sam


      # Extract cases of read pairs mapped on edge of region of interest
      # -------|======= R O I ======| ----------
      # read:                  ====---====
      ~/bin/samtools view align.F4.bam | grep -Ff read.names.tmp - > align.F4.tmp.sam

      # Complete mapped reads list
      #cut -f1 align.F4.tmp.sam > read.names.45s.long.tmp

      # Extract unmapped reads with a mapped pair
      ~/bin/samtools view align.f4F8.bam | grep -Ff read.names.tmp - > align.f4F8.tmp.sam

      # Re-compile bam file
      cat align.header.tmp align.F4.tmp.sam align.f4F8.tmp.sam | ~/bin/samtools view -bS - > align.hgr1.tmp.bam
        ~/bin/samtools sort -@ $THREADS -O BAM align.hgr1.tmp.bam > align.hgr1.bam
        ~/bin/samtools index align.hgr1.bam
        ~/bin/samtools flagstat align.hgr1.bam > align.hgr1.flagstat

      # Clean up 
      rm *tmp* align.F4.bam align.f4F8.bam

    # Rename the total Bam Files
      mv aligned_unsorted.bam $LIBRARY.bam
      #mv aligned_unsorted.bam.bai $LIBRARY.bam.bai
      mv aligned_unsorted.flagstat $LIBRARY.flagstat

    # Rename the hgr-aligned Bam files
      mv align.hgr1.bam $LIBRARY.hgr1.bam
      mv align.hgr1.bam.bai $LIBRARY.hgr1.bam.bai
      mv align.hgr1.flagstat $LIBRARY.hgr1.flagstat
      
    # Copy output to AWS S3 
    # Alignments (Full)
      aws s3 cp $LIBRARY.flagstat s3://crownproject/tcga/

    # Alignments (Aligned)
      aws s3 cp $LIBRARY.hgr1.bam s3://crownproject/tcga/
      aws s3 cp $LIBRARY.hgr1.bam.bai s3://crownproject/tcga/
      aws s3 cp $LIBRARY.hgr1.flagstat s3://crownproject/tcga/

fi
 

# Copy screen log file to AWS S3
cp ~/screenlog.0 ./$LIBRARY.screenlog
aws s3 cp $LIBRARY.screenlog s3://crownproject/tcga/logs/

# Shutdown and Terminate instance
EC2ID=$(ec2metadata --instance-id)
aws ec2 terminate-instances --instance-ids $EC2ID

# Script complete


#### queenB.sh script

In [None]:
#!/bin/bash
# queenB.sh
# 20180814 build
# EC2 Launch / Control Script
#

# 1. queenB script is initialized locally and input files
#    are parsed ready for cluster analaysis
# 2. queenB launches instances, logs in to it and runs the
#    droneB.sh script remotely.
# 3. The droneB script is executed on the instance and it
#    launches a `screen` on the instance and loads and 
#    starts to perform the $TASK (gather.sh) script.
# 4. TASK script should include a instance shut-down
#    command to close instance upon completion.
#

# Control Panel =========================
# EC2 TASK Script - script for droneB to execute
TASK="s3://crownproject/scripts/hgr1_align_v2.tcga.sh"

# Parameter file:
# Each line of PARAMETERS will be input to STDIN of
# the droneB script which can then be used to run the
# TASK script.
# i.e. bash droneB.sh <line_N_of_PARAMETERS>
# PARAMETERS="tcga0_input.txt"
PARAMETERS=$1

# EC2 Set-up
instanceTYPE='c4.xlarge'
imageID='ami-0031fd61f932bdef9' #AMI TCGA

devNAME='/dev/sda1' # /dev/sda1 for Crown-AMI
volSIZE='200' # in Gb

# Number of instances to launch
#COUNT=2 # predetermined number
COUNT=$(awk 'END{print NR}' $PARAMETERS) # for each input argument

# Security
keyNAME='CrownKey'
keyPATH="/home/artem/.ssh/CrownKey.pem"
secGROUP='crown-group'

# =======================================

for ITER in $(seq 1 $COUNT)
do

  # Extract Parameters/Arguments ----------

  ARGS=$(sed -n "$ITER"p $PARAMETERS | sed 's/\t/ /g' - )

  echo "Launch instance # $ITER"
  date
  echo "Instance Type: $instanceTYPE"
  echo "AMI Image: $imageID"
  echo "Run Script: $TASK"
  echo "Parameters: $ARGS"

  # Launch an instance --------------------
  # NOTE: each iteration of the for loop launches one instance
  # therefore each loop launches only one instance
  aws ec2 run-instances --image-id $imageID --count 1 \
   --instance-type $instanceTYPE --key-name $keyNAME \
   --block-device-mappings DeviceName=$devNAME,Ebs={VolumeSize=$volSIZE} \
   --security-groups $secGROUP > launch.tmp

  # Another alternative is to use --user-data droneB.sh 
  # which will run at instance boot-up
  # passing arguments to it may be challenging

  # Retrieve instance ID
  instanceID=$(cat launch.tmp | \
    egrep -o -e 'InstanceId[":/A-Za-z0-9_ \\-]*' - |\
    cut -f2 -d' ' - | xargs)

  echo "Instance ID: $instanceID"


  # Add a few minute wait here to allow for Public DNS to be assigned
  # otherwise ssh doesn't work
  sleep 180s

  # Retrieve public DNS
  aws ec2 describe-instances --instance-ids $instanceID > launch2.tmp

  pubDNS=$(cat launch2.tmp | \
    egrep -o -m 1 -e 'PublicDnsName[.":/A-Za-z0-9_ \\-]*' - |\
    cut -f2 -d' ' - | xargs)

  echo "Public DNS: $pubDNS"

  # Access the instance -------------------

  LOGIN="ubuntu@$pubDNS" 

  ssh -i $keyPATH \
    -o StrictHostKeyChecking=no \
    $LOGIN 'bash -s' < droneB.sh $TASK $(echo $ARGS)

  # Cleanup
  rm *.tmp

  echo ''
  echo ''

done

# end of script

#### droneB.sh script

In [None]:
#!/bin/bash
# droneB.sh
#

# This script-layer is neccesary to launch a screen session
# on each ec2-machine. The pipeline is run within that session
# and the output is logged. This allows 'looking in' on sessions
# as they are running.

# Commands to run on server-side
# ===============================================================

SCRIPTPATH=$1

SCRIPT=$(basename $1)

shift # drop first (TASK or SCRIPT variable)

# Download pipeline / droneB's function
  aws s3 cp $SCRIPTPATH ./

  chmod 777 *.sh

# open screen; run gather.sh function. -L logged
  screen -Ldmt sh ~/$SCRIPT $@


# ===============================================================


#### TCGA files

Search Terms
``` 
cases.primary_site in ["Colorectal"] and cases.project.project_id in ["TCGA-COAD"] and files.data_category in ["Raw Sequencing Data","Transcriptome Profiling"] and files.data_format in ["BAM"] and files.experimental_strategy in ["RNA-Seq"]
```

In [5]:
cd /home/artem/Desktop/Crown/data2/tcga_1_coad

cat tcga_run1A.txt
echo ' '
echo '---------------------------------------'
cat tcga_run1B.txt
echo ' '
echo '---------------------------------------'
cat tcga_run1C.txt

TCGA-A6-2671-11A TCGA-COAD 3bc30231-3fbd-4992-9088-1a83efa1a31e
TCGA-A6-2671-01A TCGA-COAD 80ff8844-3c90-4c66-b6d9-a72ea86219ed
TCGA-A6-2675-11A TCGA-COAD 3e0ae536-e178-433b-95b8-a52e14c17f14
TCGA-A6-2675-01A TCGA-COAD c1c3ed06-d423-46bf-8b43-77f7817c59bd
TCGA-A6-2678-11A TCGA-COAD 2d9c8cc3-d56a-4920-b86c-2389ca913e97
TCGA-A6-2678-01A TCGA-COAD 670e9a3d-5a6e-4ddb-a1bf-6e48e3786fb1
TCGA-A6-2679-01A TCGA-COAD 51932ec0-8c19-4c51-89ee-445e9a84c913
TCGA-A6-2679-11A TCGA-COAD 662a73d1-ef4d-482d-b427-7dfbca525571
TCGA-A6-2680-01A TCGA-COAD 13dea3ca-0454-465e-963b-9a33fafd098d
TCGA-A6-2680-11A TCGA-COAD afb3c381-bf54-453f-ae30-b0c1fc0e55fc 
---------------------------------------
TCGA-A6-2682-01A TCGA-COAD 6491c24d-93a1-42e4-8317-4999aaa264b8
TCGA-A6-2682-11A TCGA-COAD f61b9cd4-288e-4ab9-8ce9-61691cebbf47
TCGA-A6-2683-01A TCGA-COAD 61f1e33f-6c84-43cb-b7d6-53701e9ace1d
TCGA-A6-2683-11A TCGA-COAD f6997426-b4d8-438a-b6df-00f9b5894cbd
TCGA-A6-2684-11A TCGA-COAD 18625fe4-3c19-45d9-9d

### Running Launcher off of AWS

Changes to scripts above required for running off of AWS servers (if the internet connection for instance launching is inconsistent).

`queenB.sh` script

1. Launch logged screen instance for STDOUT logging `screen -Ldmt`
2. Lines 41-44: Security group certification for instance launch needs to be uploaded to launch instance.
3. Check if `aws` / `ec2` commands are available on Crown AMI.

### Results



In [6]:
# TCGA-COAD Run 1A
cd ~/Crown/data2/tcga_1_coad/
INPUT='tcga_run1A.txt'

# run...
echo ''; cat $INPUT; echo ''
aws s3 cp hgr1_align_v2.tcga.sh s3://crownproject/scripts/hgr1_align_v2.tcga.sh
bash queenB.sh $INPUT


TCGA-A6-2671-11A TCGA-COAD 3bc30231-3fbd-4992-9088-1a83efa1a31e
TCGA-A6-2671-01A TCGA-COAD 80ff8844-3c90-4c66-b6d9-a72ea86219ed
TCGA-A6-2675-11A TCGA-COAD 3e0ae536-e178-433b-95b8-a52e14c17f14
TCGA-A6-2675-01A TCGA-COAD c1c3ed06-d423-46bf-8b43-77f7817c59bd
TCGA-A6-2678-11A TCGA-COAD 2d9c8cc3-d56a-4920-b86c-2389ca913e97
TCGA-A6-2678-01A TCGA-COAD 670e9a3d-5a6e-4ddb-a1bf-6e48e3786fb1
TCGA-A6-2679-01A TCGA-COAD 51932ec0-8c19-4c51-89ee-445e9a84c913
TCGA-A6-2679-11A TCGA-COAD 662a73d1-ef4d-482d-b427-7dfbca525571
TCGA-A6-2680-01A TCGA-COAD 13dea3ca-0454-465e-963b-9a33fafd098d
TCGA-A6-2680-11A TCGA-COAD afb3c381-bf54-453f-ae30-b0c1fc0e55fc
Completed 7.1 KiB/7.1 KiB with 1 file(s) remainingupload: ./hgr1_align_v2.tcga.sh to s3://crownproject/scripts/hgr1_align_v2.tcga.sh
Launch instance # 1
Thu Aug 16 15:34:22 PDT 2018
Instance Type: c4.xlarge
AMI Image: ami-0031fd61f932bdef9
Run Script: s3://crownproject/scripts/hgr1_align_v2.tcga.sh
Parameters: TCGA-A6-2671-11A TCGA-COAD 3b

In [1]:
# TCGA-COAD Run 1B
cd ~/Crown/data2/tcga_1_coad/
INPUT='tcga_run1B1.txt'

# run...
echo ''; cat $INPUT; echo ''
aws s3 cp hgr1_align_v2.tcga.sh s3://crownproject/scripts/hgr1_align_v2.tcga.sh
bash queenB.sh $INPUT


TCGA-A6-2682-01A TCGA-COAD 6491c24d-93a1-42e4-8317-4999aaa264b8
TCGA-A6-2682-11A TCGA-COAD f61b9cd4-288e-4ab9-8ce9-61691cebbf47
TCGA-A6-2683-01A TCGA-COAD 61f1e33f-6c84-43cb-b7d6-53701e9ace1d
TCGA-A6-2683-11A TCGA-COAD f6997426-b4d8-438a-b6df-00f9b5894cbd
TCGA-A6-2684-11A TCGA-COAD 18625fe4-3c19-45d9-9d7c-a295fbf83f2e
TCGA-A6-2684-01C TCGA-COAD a1da668f-e62b-4b39-b1de-be6df71496ad
TCGA-A6-2685-11A TCGA-COAD 60b762e2-d8e2-4f1c-a005-65a221e035b4
TCGA-A6-2685-01A TCGA-COAD 69fffc3d-f1af-450f-a1a9-979111aee040
TCGA-A6-2686-11A TCGA-COAD 818274b6-5103-4689-9d71-7dbff7ea62d6
TCGA-A6-2686-01A TCGA-COAD ed77b3cf-a543-439f-8e11-7c1fb9efb157
TCGA-A6-5659-11A TCGA-COAD 3f26333a-9c85-4f55-8a3e-6f78d91cc553
TCGA-A6-5659-01A TCGA-COAD aabe673a-9363-45ce-98f6-4fddbb6a6a31
TCGA-A6-5662-11A TCGA-COAD 891d4b89-492a-4fdf-aadb-92e5f3eb2175
TCGA-A6-5662-01A TCGA-COAD da61c732-293d-41aa-821c-caba4b248276
Completed 7.1 KiB/7.1 KiB with 1 file(s) remainingupload: ./hgr1_align_v2.tcga.sh to s3

In [2]:
# TCGA-COAD Run 1B
cd ~/Crown/data2/tcga_1_coad/
INPUT='tcga_run1B2.txt'

# run...
echo ''; cat $INPUT; echo ''
aws s3 cp hgr1_align_v2.tcga.sh s3://crownproject/scripts/hgr1_align_v2.tcga.sh
bash queenB.sh $INPUT


TCGA-A6-5665-01A TCGA-COAD 9b2dd3c7-9101-4fb1-bc2c-17d5e24f6bfb
TCGA-A6-5665-11A TCGA-COAD fde66458-b58b-40e6-89cc-97970b566b85
TCGA-A6-5667-01A TCGA-COAD 7d10f16e-737a-4351-ab41-9e9794b92785
TCGA-A6-5667-11A TCGA-COAD d1f1002d-525b-4b8b-b52f-376bf792d74e
TCGA-AA-3489-01A TCGA-COAD 611afd21-d36c-4bcd-8de1-45d63c1d449d
TCGA-AA-3489-11A TCGA-COAD 7219d9a8-634e-471b-82dd-9316b7b28ff2
TCGA-AA-3496-01A TCGA-COAD bf18b4eb-ff17-4632-a0ca-9a1c0f002156
TCGA-AA-3496-11A TCGA-COAD e3adceb2-6d55-4812-b972-e46d843cb261
TCGA-AA-3511-01A TCGA-COAD 24dc3d9d-9011-4752-bf86-7308f89fd27d
TCGA-AA-3511-11A TCGA-COAD c77b629b-ae40-450e-8154-f4fc0b6bb6e3
TCGA-AA-3514-01A TCGA-COAD 5dfd07db-48b5-4e2a-8079-9ff267002b6a
TCGA-AA-3514-11A TCGA-COAD 7adde6fc-7b62-4f26-b327-d225b429823a
TCGA-AA-3516-01A TCGA-COAD 53654dfb-9885-413a-b5bb-49b19a6a0f8c
TCGA-AA-3516-11A TCGA-COAD 9081de60-839a-4e20-8702-02909e90cc71
TCGA-AA-3517-11A TCGA-COAD a4d598d7-896b-495c-9e6b-fef43193f9e8
TCGA-AA-3517-01A TCGA-C

In [3]:
# TCGA-COAD Run 1C
cd ~/Crown/data2/tcga_1_coad/
INPUT='tcga_run1C.txt'

# run...
echo ''; cat $INPUT; echo ''
aws s3 cp hgr1_align_v2.tcga.sh s3://crownproject/scripts/hgr1_align_v2.tcga.sh
bash queenB.sh $INPUT

Launch instance # 1
Wed Aug 22 13:48:04 PDT 2018
Instance Type: c4.xlarge
AMI Image: ami-0031fd61f932bdef9
Run Script: s3://crownproject/scripts/hgr1_align_v2.tcga.sh
Parameters: TCGA-AA-3518-01A TCGA-COAD 03d5c3cb-f41c-40fd-b1e3-2c95292c59d5
Instance ID: i-011030672c16e46d6
Public DNS: ec2-34-209-216-249.us-west-2.compute.amazonaws.com
download: s3://crownproject/scripts/hgr1_align_v2.tcga.sh to ./hgr1_align_v2.tcga.sh


Launch instance # 2
Wed Aug 22 13:51:18 PDT 2018
Instance Type: c4.xlarge
AMI Image: ami-0031fd61f932bdef9
Run Script: s3://crownproject/scripts/hgr1_align_v2.tcga.sh
Parameters: TCGA-AA-3518-11A TCGA-COAD 8e3f0481-0e27-48aa-a178-9c17c78c05d1
Instance ID: i-0103c846924d67737
Public DNS: ec2-35-160-230-165.us-west-2.compute.amazonaws.com
download: s3://crownproject/scripts/hgr1_align_v2.tcga.sh to ./hgr1_align_v2.tcga.sh


Launch instance # 3
Wed Aug 22 13:54:27 PDT 2018
Instance Type: c4.xlarge
AMI Image: ami-0031fd61f932bdef9
Run Script: s3:

In [1]:
# TCGA-COAD Run 1C -- 2
# interrupted last run at run 39
cd ~/Crown/data2/tcga_1_coad/
INPUT='tcga_run1C2.txt'

# run...
echo ''; cat $INPUT; echo ''
aws s3 cp hgr1_align_v2.tcga.sh s3://crownproject/scripts/hgr1_align_v2.tcga.sh
bash queenB.sh $INPUT


TCGA-AZ-6605-11A TCGA-COAD 8828647d-29ac-4ef0-9085-96846c5f1731
TCGA-AZ-6605-01A TCGA-COAD d366f4e7-0eaf-4c32-a080-864679e1a6ef
TCGA-F4-6704-11A TCGA-COAD f2e8d1e1-001b-4a31-a4ee-23936f5f3022
TCGA-F4-6704-01A TCGA-COAD f815284b-74a2-4ad8-9f1b-aa1ec54fe579
Completed 7.1 KiB/7.1 KiB with 1 file(s) remainingupload: ./hgr1_align_v2.tcga.sh to s3://crownproject/scripts/hgr1_align_v2.tcga.sh
Launch instance # 1
Wed Aug 22 16:01:56 PDT 2018
Instance Type: c4.xlarge
AMI Image: ami-0031fd61f932bdef9
Run Script: s3://crownproject/scripts/hgr1_align_v2.tcga.sh
Parameters: TCGA-AZ-6605-11A TCGA-COAD 8828647d-29ac-4ef0-9085-96846c5f1731
Instance ID: i-0c4441c447403087e
Public DNS: ec2-34-219-127-177.us-west-2.compute.amazonaws.com
download: s3://crownproject/scripts/hgr1_align_v2.tcga.sh to ./hgr1_align_v2.tcga.sh


Launch instance # 2
Wed Aug 22 16:05:08 PDT 2018
Instance Type: c4.xlarge
AMI Image: ami-0031fd61f932bdef9
Run Script: s3://crownproject/scripts/hgr1_align_v2.tcg

## Discussion

Archive to seperate coad_1 folder
```
aws s3 mv --recursive --include "*hgr1*bam" --exclude "*/*" s3://crownproject/tcga/ s3://crownproject/tcga/tcga-coad-1/
```

#### Library Preperations
There is a bit of a discord between some of the library preps (single end vs. paired end) across the individual samples. This extends to the unfortunate point where the normal control and cancer sample are discordant and therefore may have to be flagged for exclusion in downstream analysis since variant calling is likely to behave quite differently across these samples.
