# Analysis of mutational antigenic profiling of influenza A/WSN/1933(H1N1) HA using broadly-neutralizing antibodies 

In [None]:
import os
import pandas
from IPython.display import display, HTML

# define experimental samples and download sequencing data from the SRA


In [None]:
samples = pandas.DataFrame.from_records(
        [('L1_FI6v3_c1_r1' , 'SRR5930379'),
        ('L1_FI6v3_c1_r2' , 'SRR5930376'),
        ('L1_FI6v3_c2_r1' , 'SRR5930378'),
        ('L1_FI6v3_c2_r2', 'SRR5930383'),
        ('L2_FI6v3_c1', 'SRR5930381'),
        ('L2_FI6v3_c2', 'SRR5930380'),
        ('L3_FI6v3_c1', 'SRR5930384'),
        ('L3_FI6v3_c2', 'SRR5930375'),
        ('L1_mock_r1', 'SRR4841578'),
        ('L1_mock_r2', 'SRR4841581'),
        ('L2_mock', 'SRR4841580'),
        ('L3_mock', 'SRR4841582')],
        columns=['name', 'SRA accession']
        )
display(HTML(samples.to_html(index=False)))

In [None]:
print("Downloading FASTQ files from the SRA using:")
!fastq-dump --version

# directory for FASTQ files
fastqdir = './FASTQ_files/' 
if not os.path.isdir(fastqdir):
    os.mkdir(fastqdir)

# define the R1 FASTQ file name (file will be in fastqdir)
samples['R1'] = samples['name'] + '_R1.fastq.gz'

# download any FASTQ files that are not already present
for (i, row) in samples.iterrows():
    # r1filedownload / r2filedownload are initial file names given by fastq-dump 
    r1filedownload = os.path.join(fastqdir, '{0}_pass_1.fastq.gz'.format(row['SRA accession']))
    r2filedownload = r1filedownload.replace('_1.fastq.gz', '_2.fastq.gz')
    r1file = os.path.join(fastqdir, row['R1'])
    r2file = r1file.replace('_R1', '_R2')
    if os.path.isfile(r1file) and os.path.isfile(r2file):
        print("FASTQ files for {0} already exist".format(row['name']))
    else:
        print("Downloading FASTQ files for {0}...".format(row['name']))
        !fastq-dump --outdir {fastqdir} --gzip --readids --skip-technical \
                --dumpbase --clip --split-files --read-filter pass {row['SRA accession']}
        os.rename(r1filedownload, r1file)
        os.rename(r2filedownload, r2file)