Skip to content

Using the Web Services: Python

Darren Oakley edited this page Jul 4, 2011 · 5 revisions

Helpers

  #! /usr/bin/python

  # Author::    Sébastien Briois (mailto:sebriois@gmail.com)

  import httplib2 # http://httplib2.googlecode.com/files/httplib2-0.6.0.zip
  import urllib
  import base64

  try:
    import json # Python 2.6
  except ImportError:
    import simplejson as json # Python 2.4+ - http://pypi.python.org/pypi/simplejson/2.0.9

  DOMAIN     = 'localhost:3000'
  USERNAME   = 'htgt'
  PASSWORD   = 'htgt'

  # Generic helper class for handling the web requests to the repository.
  class UserAgent(object):
    def __init__(self):
      self.http = httplib2.Http()
      self.http.add_credentials(USERNAME, PASSWORD)
      self.base_url = BASE_URL

    def uri_for(self, rel_url, params = None):
      if params:
        params = urllib.urlencode(params)
        return urljoin( self.base_url, "%s?%s" % (rel_url, params) )
      return urljoin( self.base_url, rel_url )

    def request(self, method, rel_url, data = {}):
      if method in ['GET', 'DELETE']:
        uri = self.uri_for( rel_url, data )
        resp, content = self.http.request( uri, method, headers = { 'Content-Type': 'application/json' } )
      elif method in ['POST', 'PUT']:
        uri = self.uri_for( rel_url )
        data = json.dumps( data )
        resp, content = self.http.request( uri, method, data, headers = { 'Content-Type': 'application/json' } )
      else:
        raise Exception( "Method %s unknown when requesting URL %s" % (method, rel_url) )

      print "%s %s: %s" % (method, uri, resp['status'])
      if resp['status'] in ['200', '201']:
        # DELETE methods does not return any content
        return method == 'DELETE' and True or json.loads( content )

      raise Exception(content)


  # Create a User Agent
  ua = UserAgent()

  def find( url, params ):
    results = ua.request( 'GET', url, params )

    if len(results) > 1:
      raise "Your search returned more than one result."

    if not results:
      return None

    return results[0]

  #
  # Allele specific methods
  #
  def create_allele( data ):
    return ua.request( 'POST', 'alleles.json', { 'allele' : data } )

  def update_allele( id, data ):
    return ua.request( 'PUT', 'alleles/%s.json' % id, { 'allele' : data } )

  def create_or_update_allele( data ):
    allele_found = find('alleles.json', {
      'mgi_accession_id'  : data['mgi_accession_id'],
      'assembly'          : data['assembly'],
      'chromosome'        : data['chromosome'],
      'strand'            : data['strand'],
      'cassette'          : data['cassette'],
      'backbone'          : data['backbone'],
      'homology_arm_start': data['homology_arm_start'],
      'homology_arm_end'  : data['homology_arm_end'],
      'cassette_start'    : data['cassette_start'],
      'cassette_end'      : data['cassette_end'],
      'loxp_start'        : data['loxp_start'] or 'null',
      'loxp_end'          : data['loxp_end']   or 'null'
    })

    if not allele_found:
      return create_allele( data )
    else:
      return update_allele( allele_found['id'], data )

  def delete_allele( id ):
    ua.request( 'DELETE', "alleles/%s.json" % id )

  #
  # Targeting Vector specific methods
  #
  def create_targeting_vector( data ):
    return ua.request( 'POST', 'targeting_vectors.json', { 'targeting_vector' : data } )

  def update_targeting_vector( id, data ):
    return ua.request( 'PUT', 'targeting_vectors/%s.json' % id, { 'targeting_vector' : data } )

  def create_or_update_vector( data ):
    vector_found = find( "targeting_vectors.json", { 'name': data['name'] } )

    if not vector_found:
      return create_targeting_vector( data )
    else:
      return update_targeting_vector( vector_found['id'], data )

  def delete_targeting_vector( id ):
    ua.request( 'DELETE', "targeting_vectors/%s.json" % id )

  #
  # ES Cell specific methods
  #
  def create_es_cell( data ):
    return ua.request( 'POST', 'es_cells.json', { 'es_cell' : data } )

  def update_es_cell( id, data ):
    return ua.request( 'POST', 'es_cells/%s.json' % id, { 'es_cell' : data } )

  def create_or_update_es_cell( data ):
    es_cell_found = find( "es_cells.json", { 'name': data['name'] } )
    if not es_cell_found:
      return create_es_cell( data )
    else:
      return update_es_cell( es_cell_found['id'], data )

  def delete_es_cell( id ):
    ua.request( 'DELETE', "es_cells/%s.json" % id )

An Example Loading Script

  ##
  ##  Main script scenario:
  ##    - We create a data structure containing all the objects we want to create or update in the database
  ##    - We loop over this data structure and follow this procedure:
  ##      1- Search the object
  ##      2- Object found ? Yes: Update; No: Create
  ##

  # We will work with the data linked to the pipeline named "EUCOMM", let's find its ID
  pipeline_list = ua.request( 'GET', 'pipelines.json' )
  for pipeline in pipeline_list:
    if pipeline['name'] == 'EUCOMM':
      break

  # Create our data structure
  alleles = [
  
    # First allele
    {
      'mgi_accession_id'   : "MGI:123",
      'project_design_id'  : 23640,
      'cassette'           : "L1L2_gt2",
      'backbone'           : "L3L4_pZero_kan",
      'assembly'           : "NCBIM37",
      'chromosome'         : "1",
      'strand'             : "+",
      'design_type'        : "Knock Out",
      'design_subtype'     : "Frameshift",
      'homology_arm_start' : 10,
      'homology_arm_end'   : 10000,
      'cassette_start'     : 50,
      'cassette_end'       : 500,
      'loxp_start'         : 1000,
      'loxp_end'           : 1500,
    
      # Targeting vectors for the first allele
      'targeting_vectors'  : [
        {
          'pipeline_id'         : pipeline['id'],
          'name'                : 'PRPGD001',
          'intermediate_vector' : 'PGS001',
          'ikmc_project_id'     : 9801
        },
        {
          'pipeline_id'         : pipeline['id'],
          'name'                : 'PRPGD002',
          'intermediate_vector' : 'PGS001',
          'ikmc_project_id'     : 9801
        }
      ],
    
      # ES Cells for the first allele
      'es_cells' : [
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD001', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD002', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD003', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD004', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD005', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD006', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' }
      ],
    
      # Genbank File for the first allele
      'genbank_file' : { 
        'escell_clone'     : "A GENBANK FILE IN PLAIN TEXT",
        'targeting_vector' : "A GENBANK FILE IN PLAIN TEXT"
      }
    },
  
    # Second allele
    {
      'mgi_accession_id'   : "MGI:456",
      'project_design_id'  : 29871,
      'cassette'           : "L1L2_gt2",
      'backbone'           : "L3L4_pZero_kan",
      'assembly'           : "NCBIM37",
      'chromosome'         : "1",
      'strand'             : "+",
      'design_type'        : "Knock Out",
      'design_subtype'     : "Frameshift",
      'homology_arm_start' : 10,
      'homology_arm_end'   : 10000,
      'cassette_start'     : 50,
      'cassette_end'       : 500,
      'loxp_start'         : 1000,
      'loxp_end'           : 1500,
    
      # Targeting vectors for the second allele
      'targeting_vectors'  : [
        {
          'pipeline_id'         : pipeline['id'],
          'name'                : 'PRPGD003',
          'intermediate_vector' : 'PGS002',
          'ikmc_project_id'     : 6809480
        },
        {
          'pipeline_id'         : pipeline['id'],
          'name'                : 'PRPGD004',
          'intermediate_vector' : 'PGS002',
          'ikmc_project_id'     : 6809480
        }
      ],
    
      # ES Cells for the second allele
      'es_cells' : [
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD007', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD008', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD009', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD010', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD011', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
        { 'pipeline_id' : pipeline['id'], 'name' : 'EPD012', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' }
      ]
    }
  ]

  # Create or Update Alleles
  for allele_hash in alleles:
    # allele_hash should not contain unknown fields
    targeting_vectors = allele_hash.pop( 'targeting_vectors' )
    es_cells          = allele_hash.pop( 'es_cells' )
  
    allele = create_or_update_allele( allele_hash )
    allele_hash['id'] = allele['id']
  
    # Create or Update Targeting Vectors
    for vector_hash in targeting_vectors:
      vector_hash['allele_id'] = allele['id']
      vector = create_or_update_vector( vector_hash )
      vector_hash['id'] = vector['id']
  
    # Find, Create or Update ES Cells
    for es_cell_hash in es_cells:
      es_cell_hash['allele_id'] = allele['id']
    
      # Find targeting vector ID from its name or set it to nil
      # if ES Cell is not linked to a targeting vector
      if 'targeting_vector' in es_cell_hash:
        targ_vec_name = es_cell_hash.pop('targeting_vector')
        for vector in targeting_vectors:
          if vector['name'] == targ_vec_name:
            break
        es_cell_hash['targeting_vector_id'] = vector['id']
      else:
        es_cell_hash['targeting_vector_id'] = None
    
      es_cell = create_or_update_es_cell( es_cell_hash )
      es_cell_hash['id'] = es_cell['id']

  # DELETE All ES Cells
  for es_cell in es_cells: delete_es_cell( es_cell['id'] )
  for vector in targeting_vectors: delete_targeting_vector( vector['id'] )
  for allele in alleles: delete_allele( allele['id'] )