In [None]:
# Collect previously described bee phage
# Rosso et al phage
# Can be downloaded from ncbi directly using this link: https://www.ncbi.nlm.nih.gov/Traces/wgs/JAAOBB01?display=contigs&page=1
# Or can be downloaded from command line using the deboutte et al approach outlined next 
    # (as long as you have a list of accessions -- which the above link can provide)
# Also download genomes of phage isolate: accessions are MT006233-MT006240
# once downloaded, combine into one file
cat rosso_virome.fa \ # file containing all virome phage seqs
rosso_isolates.fasta \ # File containing all isolated phage genomes
>> rosso_phage.fa 

# Download deboutte et al phage
# using the genbank accessions provided here: https://github.com/Matthijnssenslab/Beevir/blob/master/Supptable_S18.xlsx
# and the ncbi entrez tool kit
efetch -db nuccore -format fasta \ # Use efetch utility (part of entrez)
-input deboutte_accessions.txt \ # Input plain text file with the list of accessions 
> deboutte_phage.fa 

# Busby et al phage
# Couldnt find a list of accessions or easily accesable seqs on NCBI
    # so just downloaded the github repo + extracted the phage seqs
git clone https://github.com/jtvanleuven/bee_phage.git
# select folder we actually want
cp path/to/downloaded/github/repo/bee_phage/analysis/all_phage.fasta ./busby_phage.fa 

# Once all "known" phage are downloaded, we want to rename them
# phage will be renamed to something like "Rosso_phage_XXX" or "Deboutte_phage_XXX"
conda activate bbduk
rename.sh in=rosso_phage.fa out=rosso_phage_renamed.fa prefix=Rosso_phage_
rename.sh in=deboutte_phage.fa out=deboutte_phage_renamed.fa prefix=Deboutte_phage_
rename.sh in=busby_phage.fa out=busby_phage_renamed.fa prefix=Busby_phage_

# Now, compile all phage into one place:
cat rosso_phage_renamed.fa \
busby_phage_renamed.fa \
deboutte_phage_renamed.fa \
>> all_described_phage.fa

# Count how many phage are here (if ya want)
grep ">" all_described_phage.fa | wc -l
# 1939 total phage