<a href="https://colab.research.google.com/github/ash-bell/Python_notebooks/blob/master/Anvi'o%20scripts%20rough.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### make downloading RefSeq database a job

```
#!/bin/bash
#PBS -V # set verbose output
#PBS -N Download.RefSeq.database.for.Kaiju
#PBS -d /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/scripts/runs
#PBS -q pq
#PBS -l walltime=48:00:00 # Maximum wall time for the job. 
#PBS -A Research_Project-172179 # research project to submit under.
#PBS -l nodes=1:ppn=16
#PBS -j oe

cd /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb
../kaiju/bin/makeDB.sh -n -t 16

```

### hmm and ncbi cogs setup

```

### run in head node
cd gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data
anvi-setup-ncbi-cogs
anvi-setup-pfam

```

```

#!/bin/bash
#PBS -V # set verbose output
#PBS -N anvio.hmms.pfams.ncbi_cogs
#PBS -d /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/scripts/runs
#PBS -q pq
#PBS -l walltime=48:00:00 # Maximum wall time for the job. 
#PBS -A Research_Project-172179 # research project to submit under.
#PBS -l nodes=1:ppn=16
#PBS -j oe

PROFILE_LOG=/gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data.log

source activate anvio5
cd /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data
anvi-run-hmms -c contigs.db --num-threads 16 2>&1 | tee -a $PROFILE_LOG
anvi-run-pfams -c contigs.db --num-threads 16 2>&1 | tee -a $PROFILE_LOG 
anvi-run-ncbi-cogs -c contigs.db --num-threads 16 2>&1 | tee -a $PROFILE_LOG



```

### contig database prep

```

anvi-run-hmms -c contigs.db --num-threads 16 2>&1 | tee -a $PROFILE_LOG
anvi-run-pfams -c contigs.db --num-threads 16
kaiju -t /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/proGenomes/nodes.dmp \
      -f /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/proGenomes/kaiju_db.fmi \
      -i gene_calls.fa \
      -o gene_calls_nr.out \
      -z 16 \
      -v 2>&1 | tee -a $PROFILE_LOG

addTaxonNames -t /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/nodes.dmp \
              -n /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/names.dmp \
              -i gene_calls_nr.out \
              -o gene_calls_nr.names \
              -r superkingdom,phylum,class,order,family,genus,species

cp contigs.db contigs.db.bak.pre.kaiju

anvi-import-taxonomy-for-genes -i gene_calls_nr.names \
                               -c contigs.db \
                               -p kaiju \
                               --just-do-it 2>&1 | tee -a $PROFILE_LOG
anvi-run-ncbi-cogs -c contigs.db --num-threads 16 --sensitive 2>&1 | tee -a $PROFILE_LOG

```

In [0]:
### Profiling BAMs for anvio

```

#!/bin/bash
#PBS -V # set verbose output
#PBS -N anvio.profile.SAMPLE
#PBS -d /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/scripts/runs
#PBS -q pq
#PBS -l walltime=48:00:00 # Maximum wall time for the job. 
#PBS -A Research_Project-172179 # research project to submit under.
#PBS -l nodes=1:ppn=16
#PBS -j oe

source activate anvio5

PROJECT_HOME=/gpfs/ts0/home/bt273/BIOS-SCOPE/metag/data/AE1712

OUT_DIR=$PROJECT_HOME/assemblies
cd $OUT_DIR


NAME=$(echo "SAMPLE" | sed "s/2654_//g")
BAMFILE=$OUT_DIR/mappings/$NAME.vs.ae1712.sorted.bam

LOGFILE=$OUT_DIR/profiles/SAMPLE.profile.log

#NAME=$(echo "SAMPLE" | sed "s/.*\(AE1712.*\)/\1/g")

anvi-profile \
--input-file $BAMFILE \
--contigs-db AE1712.db \
--output-dir profiles/$NAME \
--sample-name $NAME \
--overwrite-output-destinations \
--profile-SCVs \
--num-threads 16 2>&1 | tee -a $LOGFILE

```

### kaiju stuff

#!/bin/bash
#PBS -V # set verbose output
#PBS -N anvi.run.pfams
#PBS -d /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/scripts/runs
#PBS -q pq
#PBS -l walltime=48:00:00 # Maximum wall time for the job. 
#PBS -A Research_Project-172179 # research project to submit under.
#PBS -l nodes=1:ppn=16
#PBS -j oe

source activate anvio5
cd /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data
anvi-get-sequences-for-gene-calls -c contigs.db -o gene_calls.fa
kaiju -t /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/nodes.dmp \
      -f /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/kaiju_db.fmi \
      -i /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data/gene_calls.fa \
      -o /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data/gene_calls_nr.out \
      -z 16 \
      -v 2>&1 | tee -a $PROFILE_LOG

addTaxonNames -t /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/nodes.dmp \
              -n /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/ref/kaijudb/names.dmp \
              -i /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data/gene_calls_nr.out \
              -o /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data/gene_calls_nr.names \
              -r superkingdom,phylum,class,order,family,genus,species

cp contigs.db contigs.db.bak.pre.kaiju

anvi-import-taxonomy-for-genes -i /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data/gene_calls_nr.names \
                               -c /gpfs/ts0/home/bt273/BIOS-SCOPE/metag/ashley/data/contigs.db \
                               -p kaiju \
