In [None]:
import subprocess
import timeit
from Bio import Phylo

In [None]:
# Globals
DATA_DIR = '../data'

In [None]:
# Helpers
def run(cmd: list) -> None:
    """Run os cmd and time execution"""
    start = timeit.default_timer()
    result = subprocess.run(args=cmd, capture_output=True)
    elapsed = round(timeit.default_timer() - start)
    print(f"Execution Time: {elapsed // 60}m {elapsed % 60}s")
    output = result.stdout if result.stdout else result.stderr
    print(output.decode('utf-8'))

In [None]:
# Download sequences + metadata
for file in ['example_data/sequences.fasta', 'data/metadata.tsv']:
    print(file)

    run([
        'wget',
        '-q', f"https://raw.githubusercontent.com/nextstrain/ncov/master/{file}",
        '-O', f"{DATA_DIR}/raw/{file.split('/')[-1]}",
    ])

In [None]:
# Align
run([
    'augur', 'align',
    '--sequences', f"{DATA_DIR}/raw/sequences.fasta",
    '--output', f"{DATA_DIR}/interim/aligned.fasta",
    '--method', 'mafft',
    '--nthreads', 'auto',
    '--fill-gaps',
])

In [None]:
# Construct tree
run([
    'augur', 'tree',
    '--alignment', f"{DATA_DIR}/interim/aligned.fasta",
    '--output', f"{DATA_DIR}/interim/tree.nwk",
    '--nthreads', 'auto',
])

In [None]:
# Visualize tree
tree = Phylo.read(f"{DATA_DIR}/interim/tree.nwk", 'newick')
Phylo.draw_ascii(tree)