In [None]:
import ftplib
import gzip
import hashlib
import os
import xml.dom.minidom as minidom

import requests

In [None]:
fastq_content = '@read_name\nGATTTGGG\n+\n********\n'
md5_chksum = hashlib.md5(fastq_content.encode('UTF-8')).hexdigest()
with gzip.open('upload_test.fastq.gz', 'wt') as upload_test:
    upload_test.write(fastq_content)

The initial file upload is made via FTP.
The ENA documentation at http://ena-docs.readthedocs.io/en/latest/upload_01.html gives the FTP parameters:
1. Host: webin.ebi.ac.uk
2. Mode: binary
3. User: Webin user
4. Password: Webin password

In [None]:
with ftplib.FTP(host='webin.ebi.ac.uk', user=os.environ['WEBIN_UN'], passwd=os.environ['WEBIN_PW']) as wftp:
    print('### get welcome')
    print(wftp.getwelcome())
    print('###')
    
    print('### dir')
    print(wftp.dir())
    print('###')
    
    print('### stor')
    wftp.storbinary('STOR upload_test.fastq.gz', open('upload_test.fastq.gz', 'rb'))
    print('###')
    
    print('### dir')
    print(wftp.dir())
    print('###')

## Submit a Study (also known as a Project)

Several XML documents are required.
The 'submission' document indicates the action to take.  For example to add a study use this submission document:
<SUBMISSION>
     <ACTIONS>
         <ACTION>
             <ADD/>
         </ACTION>
    </ACTIONS>
</SUBMISSION>

Notes:
1. If no release date is provided then submitted studies and any associated objects will be publicly released two years after the date of study submission.



In [None]:
submission_add_xml = """\
<?xml version="1.0" encoding="UTF-8"?>
<SUBMISSION center_name="the central center">
   <ACTIONS>
      <ACTION>
         <ADD/>
      </ACTION>
   </ACTIONS>
</SUBMISSION>
"""
submission_add_xml_fp = "submission_add.xml"
# text or binary?
with open(submission_add_xml_fp, "wt") as submission_file:
    submission_file.write(submission_add_xml)

In [None]:
study_xml = """\
<?xml version = '1.0' encoding = 'UTF-8'?>
<PROJECT_SET>
   <PROJECT alias="imicrobe_programmatic_study" center_name="the central center">
      <TITLE>Demonstration of Programmatic Data Submission</TITLE>
      <DESCRIPTION>A demonstration of programmatic data submission.</DESCRIPTION>
      <SUBMISSION_PROJECT>
         <SEQUENCING_PROJECT/>
      </SUBMISSION_PROJECT>
   </PROJECT>
</PROJECT_SET>
"""
study_xml_fp = "study.xml"
with open(study_xml_fp, "wt") as study_file:
    study_file.write(study_xml)

In [None]:
url = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/"
files = {"SUBMISSION": open("submission_add.xml", "rb"), "PROJECT": open("study.xml", "rb")}

r = requests.post(url, files=files, auth=(os.environ["WEBIN_UN"], os.environ["WEBIN_PW"]))
r.text

In [None]:
print(minidom.parseString(r.text).toprettyxml(indent="  "))

## Submit a Sample

Note: The sample XML comes from the documentation but I had to add SAMPLE_ATTRIBUTE "project name".

In [None]:
sample_xml = """\
<?xml version="1.0" encoding="UTF-8"?>
<SAMPLE_SET>
  <SAMPLE alias="IMICROBESAMPLE" center_name="the central center">
    <TITLE>human gastric microbiota, mucosal</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1284369</TAXON_ID>
      <SCIENTIFIC_NAME>stomach metagenome</SCIENTIFIC_NAME>
      <COMMON_NAME></COMMON_NAME>
    </SAMPLE_NAME>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>imicrobe_programmatic_study</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>mimarks-survey</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>pyrosequencing</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection date</TAG>
        <VALUE>2010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host body site</TAG>
        <VALUE>Mucosa of stomach</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>human-associated environmental package</TAG>
        <VALUE>human-associated</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>1.81</VALUE>
        <UNITS>DD</UNITS>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-78.76</VALUE>
        <UNITS>DD</UNITS>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Colombia</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (region and locality)</TAG>
        <VALUE>Tumaco</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>coast</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>gastric biopsy</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-CHECKLIST</TAG>
        <VALUE>ERC000014</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
</SAMPLE_SET>
"""
sample_xml_fp = "sample.xml"
with open(sample_xml_fp, "wt") as sample_file:
    sample_file.write(sample_xml)

In [None]:
url = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/"
files = {"SUBMISSION": open("submission_add.xml", "rb"), "SAMPLE": open("sample.xml", "rb")}

r = requests.post(url, files=files, auth=(os.environ["WEBIN_UN"], os.environ["WEBIN_PW"]))
r.text

In [None]:
print(minidom.parseString(r.text).toprettyxml(indent="  "))

## Submit a file of reads

Note: files must be compressed.

In [None]:
exp_xml = """\
<EXPERIMENT_SET>
   <EXPERIMENT alias="imicrobe_exp" center_name="the central center">
       <TITLE>The 1KITE project: evolution of insects</TITLE>
       <STUDY_REF accession="SRP017801"/>
       <DESIGN>
           <DESIGN_DESCRIPTION/>
           <SAMPLE_DESCRIPTOR accession="SRS462875"/>
           <LIBRARY_DESCRIPTOR>
               <LIBRARY_NAME/>
               <LIBRARY_STRATEGY>RNA-Seq</LIBRARY_STRATEGY>
               <LIBRARY_SOURCE>TRANSCRIPTOMIC</LIBRARY_SOURCE>
               <LIBRARY_SELECTION>cDNA</LIBRARY_SELECTION>
               <LIBRARY_LAYOUT>
                   <SINGLE/>
               </LIBRARY_LAYOUT>
               <LIBRARY_CONSTRUCTION_PROTOCOL>Messenger RNA (mRNA) was isolated using the Dynabeads mRNA Purification Kit (Invitrogen, Carlsbad Ca. USA) and then sheared using divalent cations at 72*C. These cleaved RNA fragments were transcribed into first-strand cDNA using II Reverse Transcriptase (Invitrogen, Carlsbad Ca. USA) and N6 primer (IDT). The second-strand cDNA was subsequently synthesized using RNase H (Invitrogen, Carlsbad Ca. USA) and DNA polymerase I (Invitrogen, Shanghai China). The double-stranded cDNA then underwent end-repair, a single `A? base addition, adapter ligati on, and size selection on anagarose gel (250 * 20 bp). At last, the product was indexed and PCR amplified to finalize the library prepration for the paired-end cDNA.</LIBRARY_CONSTRUCTION_PROTOCOL>
           </LIBRARY_DESCRIPTOR>
       </DESIGN>
       <PLATFORM>
           <ILLUMINA>
               <INSTRUMENT_MODEL>Illumina HiSeq 2000</INSTRUMENT_MODEL>
           </ILLUMINA>
       </PLATFORM>
       <EXPERIMENT_ATTRIBUTES>
           <EXPERIMENT_ATTRIBUTE>
               <TAG>library preparation date</TAG>
               <VALUE>2010-08</VALUE>
           </EXPERIMENT_ATTRIBUTE>
       </EXPERIMENT_ATTRIBUTES>
   </EXPERIMENT>
</EXPERIMENT_SET>"""
exp_xml_fp = "exp.xml"
with open(exp_xml_fp, "wt") as exp_file:
    exp_file.write(exp_xml)

In [None]:
md5_chksum

In [None]:
run_xml = """\
<RUN_SET>
    <RUN alias="imicrobe_run" center_name="the central center">
        <EXPERIMENT_REF refname="imicrobe_exp"/>
        <DATA_BLOCK>
            <FILES>
                <FILE filename="upload_test.fastq.gz" filetype="fastq" 
                      checksum_method="MD5" checksum="3d8eb596dd5a236850449e1482f5e1aa"/>
            </FILES>
        </DATA_BLOCK>
    </RUN>
</RUN_SET>
"""
run_xml_fp = "run.xml"
with open(run_xml_fp, "wt") as run_file:
    run_file.write(run_xml)

In [None]:
url = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/"
files = {"SUBMISSION": open("submission_add.xml", "rb"), "EXPERIMENT": open("exp.xml", "rb"), "RUN": open("run.xml", "rb")}

r = requests.post(url, files=files, auth=(os.environ["WEBIN_UN"], os.environ["WEBIN_PW"]))
r.text

In [None]:
print(minidom.parseString(r.text).toprettyxml(indent="  "))