Permalink
Browse files

idls for bsane implementation - copied straight from corba-server, we…

… want 2 copies so people can checkout only client or server code at one time

svn path=/bioperl-corba-client/trunk/; revision=41
  • Loading branch information...
1 parent ee5b8a6 commit 57afdbacef0c44b0638fb178cb69a0bb1d055df7 @hyphaltip hyphaltip committed Sep 27, 2001
Showing with 1,221 additions and 0 deletions.
  1. +433 −0 idl/bsane.idl
  2. +104 −0 idl/collection.idl
  3. +279 −0 idl/comparison.idl
  4. +383 −0 idl/seqcore.idl
  5. +22 −0 idl/types.idl
View
433 idl/bsane.idl
@@ -0,0 +1,433 @@
+//$Id: bsane.idl,v 1.1 2001-09-27 16:34:49 jason Exp $
+
+#ifndef _DS_LSR_BSANE_IDL_
+#define _DS_LSR_BSANE_IDL_
+
+#pragma prefix "omg.org"
+#include <types.idl>
+
+
+module bsane {
+
+ /**
+ * Defines a method to instruct the server that the entity is no
+ * longer needed by the client.
+ **/
+ interface Removable {
+ void remove();
+ };
+
+ /**
+ * There is a need for a data type to indicate an entity's identity
+ * in very many situations. In most cases, this need is, or can be
+ * addressed by using a string type. The advantages are that it is
+ * simple, lightweight, and used universally throughout the realm of
+ * computing (and indeed outside). However, the risk of using
+ * strings is that they can be too flexible, both in terms of syntax
+ * and semantics. This easily results in the lack of
+ * interoperability. To allow strings, yet mitigate their potential
+ * for abuse, this standard uses the syntax convention of
+ * CosNaming::StringName as described in the Interoperable Naming
+ * service. This convention is mainly a syntactical one; in no way
+ * is the use of a naming service implementation required or implied
+ * (but it is not precluded either).
+ **/
+ typedef string Identifier;
+ typedef sequence<Identifier> IdentifierList;
+
+
+ /**
+ * Thrown if request is too large for server to handle.
+ * @see seqcore::AnonymousSequence
+ **/
+ exception RequestTooLarge {
+ string reason;
+ unsigned long suggested_size;
+ };
+
+ /**
+ * Exception is raised if iterator becomes invalid, for example, its
+ * content has changed during the read.
+ **/
+
+ exception IteratorInvalid {
+ string reason;
+ };
+
+ /**
+ * The exception is raised if attribute of entity does not exist
+ **/
+
+ exception DoesNotExist {
+ string reason;
+ };
+
+ /**
+ * The IdentifierDoesNotExist exception is raised for cases where
+ * the database and the identifier within the database can be
+ * resolved but the Identifier is not present. <p>
+ *
+ * Returns the Identifier that could not be found.
+ **/
+ //NOTE: "inherit" from DoesNotExist
+ exception IdentifierDoesNotExist {
+ Identifier id;
+ };
+
+ /**
+ * The IdentifierNotResolvable exception is raised for cases where
+ * database and the identifier within the database cannot be
+ * resolved such that the Identifier cannot even be searched for. <p>
+ *
+ * Returns the Identifier that could not be resolved and a string
+ * containing the reason resolution was not possible.
+ **/
+ exception IdentifierNotResolvable {
+ Identifier id;
+ string reason;
+ };
+
+
+
+ /**
+ * The IdentifierNotUnique exception is raised for cases when the
+ * Identifier specification is ambiguous and returns more than one
+ * object.
+ **/
+ exception IdentifierNotUnique {
+ /** the non-unique Identifier **/
+ Identifier id;
+ /** Identifiers for all objects that id identifies. **/
+ IdentifierList ids;
+ };
+
+ exception OutOfBounds {
+ string reason;
+ };
+
+ exception IllegalSymbolException {
+ string reason;
+ };
+
+ /*
+ * ALIASES and STRUCTS
+ * =================== */
+
+ typedef sequence<string> StringList;
+ typedef short BasisDef;
+
+ struct NameValuePair {
+ string name;
+ any value;
+ };
+ typedef sequence<NameValuePair> NameValuePairList;
+
+ /*
+ * INTERFACES
+ */
+ interface Annotation;
+ interface Iterator;
+
+ typedef sequence<Annotation> AnnotationList;
+
+ /**
+ * AnnotationCollection contains Annotations, which belongs to a
+ * Annotatable entity. Note: life-cycle of the collection is coupled
+ * with life-cycle of of owner entity (e.g. BioSequence).<p>
+ *
+ * @see seqcore::SeqFeatureCollection
+ **/
+ interface AnnotationCollection {
+
+ /**
+ * Number of annotations in collection
+ **/
+ unsigned long get_num_annotations();
+
+ AnnotationList get_annotations(
+ in unsigned long how_many,
+ out Iterator the_rest);
+ /**
+ * Provides access to the annotations by name. Can raise
+ * IdentifierNotResolvable exception if the name is not part
+ * of controlled vocabulary in the context of owner entity.
+ **/
+ AnnotationList get_annotations_by_name( in string name)
+ raises( IdentifierNotResolvable) ;
+ };
+
+ /**
+ * Basis interface declares constants that are used to specify
+ * whether an Annotation or Sequence originated from an experimental
+ * result or a computational analysis, such as from the application
+ * of a sequence analysis program.
+ *
+ * NOTE: Use prefix BASIS to avoid collisions in names as in the BSA?
+ **/
+ interface Basis {
+
+ /** NOT_KNOWN should be used in all cases not indicated below **/
+ const BasisDef NOT_KNOWN=0;
+ /** EXPERIMENTAL should be used to indicate an experimental result
+ **/
+ const BasisDef EXPERIMENTAL=1;
+ /**
+ * EXPERIMENTAL is used to indicate a computational analysis, such
+ * as from the application of a sequence analysis program.
+ **/
+ const BasisDef COMPUTATIONAL=2;
+ /**
+ * Any result determined both experimentally and computationally
+ * should use BOTH
+ **/
+ const BasisDef BOTH=3;
+
+ /**
+ * BASIS_NOT_APPLICABLE should be used to indicate that Basis
+ * doesn't apply.
+ **/
+
+ const BasisDef NOT_APPLICABLE=4;
+ };
+
+ /**
+ * Annotatable
+ *
+ * interface defines an entity which has AnnotationCollection
+ **/
+ interface Annotatable {
+ AnnotationCollection get_annotations() ;
+ };
+
+ /**
+ * Identifiable is a base class for entities, which have existence
+ * (identity) and life cycle (they are Removable) of their own.
+ **/
+ interface Identifiable : Removable {
+ /**
+ * Unique id of entity.
+ **/
+ Identifier get_id();
+ /**
+ * Non unique descriptive name of enity.
+ **/
+ string get_name();
+ string get_description();
+ BasisDef get_basis();
+ };
+ typedef sequence<Identifiable> IdentifiableList;
+
+ /**
+ * Annotation interface defines an annotation that could, in
+ * principle, be associated with any bio-object that requires
+ * description using name-value pairs.
+ *
+ * @see seqcore::SeqFeature
+ **/
+ interface Annotation : Removable {
+
+ /**
+
+ * The name attribute specifies the general type of the annotation
+ * that is contained in the value attribute that contains the
+ * annotation itself. The value is of type any and therefore could
+ * contain anything from a block of free text to a specialized
+ * datatype.
+ **/
+ string get_name();
+
+ /**
+ * Annotation has a basis attribute, which specifies whether the
+ * annotation originated from an experimental result
+ * (EXPERIMENTAL) or a computational analysis (COMPUTATIONAL),
+ * such as from the application of a sequence analysis
+ * program. Basis provides for a coarse-grained classification of
+ * an Annotation.
+ **/
+ BasisDef get_basis();
+
+ /**
+ * The value attribute contains the annotation itself.
+ **/
+ any get_value();
+
+ };
+
+ /**
+ * Iterator
+ **/
+ interface Iterator : Removable {
+
+ /**
+ * The next() operation gets the next Annotation in its out
+ * parameter the_octet and returns a boolean value. If the
+ * iterator is at the end of the set, it returns FALSE and sets
+ * the output the_annotation parameter to null.<P>
+ *
+ * Raises IteratorInvalid if the iterator is no longer valid (e.g.,
+ * the underlying collection has changed).
+ **/
+ boolean next(out Object the_object)
+ raises(IteratorInvalid);
+
+ /**
+ * next_n() returns Objects in the ObjectSeq out parameter
+ * objects, containing at most the number specified in the first
+ * parameter (how_many) and returns a boolean value directly. When
+ * it is at the end of the set it returns FALSE and the
+ * annotations parameter will have length zero. In all cases the
+ * length of annotations will be the minimum of how_many and the
+ * number of elements remaining.
+ **/
+ boolean next_n(in unsigned long how_many,
+ out Types::ObjectSeq objects)
+ raises(IteratorInvalid);
+ /** reset() sets the iterator to the start of the set. **/
+ void reset();
+ };
+
+ /**
+ * OctectIterator
+ **/
+ interface OctetIterator : Removable {
+
+ /**
+ * The next() operation gets the next octet in its out parameter
+ * the_octet and returns a boolean value. If the iterator is at
+ * the end of the set, it returns FALSE and sets the output
+ * the_annotation parameter to null.<P>
+ *
+ * Raises IteratorInvalid if the iterator is no longer valid
+ * (e.g., the underlying collection has changed).
+ **/
+ boolean next(out Object the_object)
+ raises(IteratorInvalid);
+
+ /**
+ * next_n() returns octets in the OctetSeq out parameter octets,
+ * containing at most the number specified in the first parameter
+ * (how_many) and returns a boolean value directly. When it is at
+ * the end of the set it returns FALSE and the annotations
+ * parameter will have length zero. In all cases the length of
+ * annotations will be the minimum of how_many and the number of
+ * elements remaining.
+ **/
+ boolean next_n(in unsigned long how_many,
+ out Types::OctetSeq octets);
+
+ /** reset() sets the iterator to the start of the set. **/
+ void reset();
+ };
+
+ interface Alphabet;
+ typedef sequence<Alphabet> AlphabetList;
+
+ interface Symbol;
+ typedef sequence<Symbol> SymbolList;
+
+ /**
+ * Alphabet contains set of symbols, which can be concatenated to
+ * form symbol lists. Sequence string, for example, is stringified
+ * representation of the symbol list (tokens of symbols).
+ **/
+ interface Alphabet : Identifiable, Annotatable {
+
+ /**
+ * List of symbols, which make up this
+ * alphabet.
+ **/
+ SymbolList get_symbols();
+
+ /**
+ * Sub-alphabets. E.g. codons made from DNAxDNAxDNA alphabets
+ **/
+ AlphabetList alphabets();
+
+ boolean contains( in Symbol s);
+
+ /**
+ * Resolve symbols from the token string.
+ **/
+ SymbolList to_symbol(in string tokens)
+ raises ( IllegalSymbolException) ;
+
+ /**
+ * Convinience method, which returns gap symbol that do not
+ * match with any other symbols in the alphabet.
+ **/
+ Symbol get_gap_symbol() raises ( DoesNotExist) ;
+
+ /**
+ * Returns a ambiguity symbol, which represent list of
+ * symbols. All symbols in a list must be members of
+ * this alphabet otherwise IllegalSymbolException is
+ * thrown.
+ **/
+ Symbol get_ambiguity( in SymbolList symbols)
+ raises( IllegalSymbolException) ;
+
+ /**
+ * Returns a Symbol, which represents ordered list of symbols
+ * given as a parameter. Each symbol in the list must be member of
+ * different sub-alphabet in the order defined by the alphabets
+ * attribute. For example, codons can be represented by a compound
+ * Alphabet of three DNA Alphabets, in which case the get_symbol(
+ * SymbolList[ a,g,t]) method of the Alphabet returns Symbol for
+ * the codon agt.<p>
+ *
+ * IllegalSymbolException is raised if members of symbols
+ * are not Symbols over the alphabet defined by
+ * get_alphabets()-methid
+ **/
+ Symbol get_symbol(in SymbolList symbols)
+ raises(IllegalSymbolException) ;
+
+ };
+
+
+
+ /**
+ * Symbol represents a single token in the sequence. Symbol can have
+ * multiple synonyms or matches within the same Alphabet, which
+ * makes possible to represent ambiguity codes and gaps.<p>
+ *
+ * Symbols can be also composed from ordered list other symbols. For
+ * example, codons can be represented by single Symbol using a
+ * compound Alphabet made from three DNA Alphabets.
+ **/
+ interface Symbol {
+
+ /**
+ * Descriptive name for the symbol
+ **/
+ string get_name();
+ /**
+ * Token for the symbol. E.g. letter A, G, T or C in a DNA
+ * alphabet.
+ **/
+ string get_token();
+ /**
+ * List of Symbols that this symbol is composed from.
+ **/
+ SymbolList get_symbols(); //moved from the BasisSymbol
+ /**
+ * (Sub) alphabet of symbols matched by this symbol including
+ * the symbol itself (i.e. if symbol is DNA ambiguity code W
+ * then the matches contains symbols for W and T)
+ **/
+ Alphabet get_matches();
+ };
+
+ /**
+ * Marker interface which represents a Symbol, which is not
+ * ambiguous, i.e., their matches attribute has an Alphabet, which
+ * contain just that one symbol. Single DNA nucleotide, codon,
+ * amino-acid are examples of AtomicSymbols
+ **/
+ interface AtomicSymbol : Symbol {
+
+ };
+
+
+};
+#endif // _DS_LSR_BSANE_IDL_
View
104 idl/collection.idl
@@ -0,0 +1,104 @@
+//$Id: collection.idl,v 1.1 2001-09-27 16:34:49 jason Exp $
+#ifndef _DS_LSR_COLL_IDL_
+#define _DS_LSR_COLL_IDL_
+
+#pragma prefix "omg.org"
+
+#include <bsane.idl>
+#include <seqcore.idl>
+#include <comparison.idl>
+
+module bsane {
+
+ /**
+ * The module contains interfaces for basic biological collections
+ **/
+ module collection {
+
+ /** is this exception needed with resolve()-method??: took from EMBL IDL**/
+ exception Superceded { IdentifierList identifiers; };
+
+ /**
+ * The BioSequenceIdentifierResolver provides a mechanism to
+ * retrieve the actual BioSequence object from a collection
+ * search, using the Identifier string.<p>
+ *
+ * NOTE: Do we need general IdentifiableResolver...<p>
+ *
+ * @since BSA
+ **/
+ interface BioSequenceIdentifierResolver {
+
+ /**
+ * Resolve the sequence identifier. Raises IdentifierDoesNotExist
+ * exception if sequence do not exist and
+ * IndetifierNotResolvable if the identifier is not
+ * valid. Identifier is not valid if its syntax is not OK
+ * or it contain, for example, wrong database id. NOTE:
+ * The database id can be obtained from the
+ * SequenceDatabase interface (id-attribute inherited
+ * from the Identifiable)
+ *
+ * Raises IdentifierNotUnique if the Identifier specification is
+ * ambiguous and returns more than one object.
+ *
+ * NOTE: Use Superceded-execption to notify that entry has
+ * changed or merged with some other entry (preivious Ids are in
+ * exception)???
+ **/
+
+ seqcore::BioSequence resolve( in Identifier id )
+ raises ( IdentifierDoesNotExist, IdentifierNotResolvable, IdentifierNotUnique);
+
+ };
+
+
+ /**
+ * Represents sequence collections from EST libraries to sequence
+ * databases etc... The identifier of the colletion (inherited from
+ * the Identifiable) must be consistent with the identifiers of
+ * sequences.
+ *
+ * @since BSANE 0.1
+ **/
+ interface BioSequenceCollection :
+ BioSequenceIdentifierResolver, Identifiable, Annotatable {
+
+ /** Uses the list/iterator hybrid to provide access to the
+ * Seqs. A list of no more than how_many elements is
+ * returned as the direct result. The remaining elements, if any,
+ * are available through the iterator returned in the out
+ * parameter. RequestTooLarge exception is throw if client
+ * asks too much.
+ *
+ * NOTE: Changed from AnonymousSequence to BioSequence ...also resolver
+ **/
+ seqcore::BioSequenceList get_seqs( in unsigned long how_many,
+ out Iterator the_rest);
+
+
+ };
+
+ interface TreeNode;
+ typedef sequence <TreeNode>TreeNodeList;
+
+ /**
+ * TreeNode represents a general tree node with a parent and
+ * children.<p>
+ *
+ * NOTE: move outside the "Bio" module, rename to AbstractTreeNode
+ *
+ * @since BSANE 0.1
+ **/
+ interface TreeNode {
+
+ TreeNode get_parent();
+ TreeNodeList get_children ( in unsigned long how_many,
+ out Iterator the_rest) ;
+
+ };
+
+ };
+};
+
+#endif // _DS_LSR_COLL_IDL_
View
279 idl/comparison.idl
@@ -0,0 +1,279 @@
+//$Id: comparison.idl,v 1.1 2001-09-27 16:34:49 jason Exp $
+#ifndef _DS_LSR_BSANE_COMPA_IDL_
+#define _DS_LSR_BSANE_COMPA_IDL_
+
+#pragma prefix "omg.org.bsane"
+
+#include <bsane.idl>
+#include <seqcore.idl>
+
+module bsane {
+
+ /**
+ * The module contains entities related to sequence comparison.
+ **/
+ module comparison {
+
+
+ /**
+ * An AlignmentElement corresponds to row in a traditional
+ * alignment. However to make it general, it is represented by a
+ * wrapper that allows any Object to be used in an Alignment. This
+ * approach allows the occurrence of one and the same Object in
+ * different rows (using the key), and also avoids the
+ * combinatorial problem of having every type of BioSequence
+ * duplicated just so it can be used in an Alignment. This
+ * approach allows other objects, not yet defined in this standard
+ * (e.g., hidden Markov models, to be used in the alignment). Most
+ * commonly, however, AlignmentElement will contain an element of
+ * type BioSequence.
+ *
+ * The key provides a unique reference to each AlignmentElement to
+ * be maintained between the client and the server of the
+ * Alignment. Notice that there may be more than one copy of a
+ * particular Object in the Alignment. There is no proscribed
+ * semantics to how the key is structured. The following provides
+ * examples of keys that could be used if the Objects are
+ * BioSequences.
+ *
+ **/
+
+ interface AlignmentElement : Annotatable {
+
+ /**
+ * The key provides a unique reference to each
+ * AlignmentElement to be maintained between the client
+ * and the server of the Alignment. Notice that there may
+ * be more than one copy of a particular Object in the
+ * Alignment. There is no proscribed semantics to how the
+ * key is structured. It is used in the get_seq_region()
+ * method in Alignment to provide a unique key for this
+ * AlignmentElement.
+ **/
+ string get_key() ;
+
+ /**
+ * Original *unmodified* sequence that is being aligned. Usually
+ * this is one of subclasses of AnononymousSequence, which have
+ * the Alphabet that can be used to interpret "edited sequence"
+ * obtained from the Alignment interface.
+ *
+ **/
+ seqcore::AnonymousSequence get_element();
+
+ /**
+ * The seq_region represents the coordinates of a particular
+ * segment of the element (typically a BioSequence) that is
+ * aligned in the current Alignment, and that is considered one
+ * row in the Alignment. The coordinates are those of the
+ * original Object, not those of the Alignment. Notice that a
+ * particular Object might be represented more than once in the
+ * Alignment, and seq_region will provide the information as to
+ * the region of the Object that is used. The only valid
+ * SeqFeatureLocationOperator is JOIN.
+ * NOTE: muliple regions can be stored into sub_regions of
+ * SeqFeatureLocation
+ **/
+
+ seqcore::SeqFeatureLocation get_region() ;
+
+ };
+ typedef sequence<AlignmentElement> AlignmentElementList;
+
+
+ /**
+ * An Alignment is built from a set of correspondences of regions
+ * of sequences. In many cases the sequence region is only a
+ * single residue (a single base or a single amino acid) long, but
+ * this need not be. For example, a region of three DNA base
+ * pairs, representing a single amino acid, is a common region
+ * size. Each correspondence, which is called a column due to the
+ * common visual interpretation of an alignment, indicates that a
+ * particular region of one sequence is in some manner equivalent
+ * to set of particular regions on other sequences. The exact
+ * nature of this equivalence differs between different alignment
+ * methods, the most common being that these regions shared a
+ * common evolutionary ancestor. An alternative is that these
+ * regions were read from the same region of physical DNA, as in a
+ * DNA assembly.
+ *
+ * Alignment representation in sequence analysis has been
+ * dominated by text based representation of the
+ * correspondences as columns, with sequences running
+ * horizontally and each correspondence being represented by a
+ * column. Padding characters (often '-') are placed in
+ * sequences to align the residues with the correct
+ * correspondences in other sequences.
+ *
+ * @see collection::SequenceCollection
+ * @see seqcore::AnonymousSequence
+ **/
+ interface Alignment {
+
+ /**
+ * An AlignType is a string that contains the type of the
+ * assumption made for this grouping of regions on
+ * sequences. Several kinds of AlignTypes are given
+ * below. Common alignment assumptions are provided as
+ * simple strings, with constant types as a starting point
+ * for a list of assumptions. UNKNOWN indicates that no
+ * additional information is provided with the alignment,
+ * as would be the case for (e.g., Smith-Waterman
+ * alignments). PROTEIN indicates that this column does
+ * encode (part of) a protein. This can be either because
+ * it contains one or more amino acid residues, or more
+ * importantly, because the column consists of triplet(s)
+ * of DNA bases that encode amino acid(s). A very common
+ * region size is 1 for amino acids, and 3 for nucleotide
+ * triplets. However, more complex regions, e.g., a
+ * transmembrane protein sequence segment, are entirely
+ * possible. SEQUENCE_ERROR indicates that the column
+ * contains bases that are considered to be erroneous.
+ * For example, in aligning a protein to a DNA sequence it
+ * possible to distinguish insertions due to evolutionary
+ * processes (PROTEIN) from insertions due to sequencing
+ * error (SEQUENCE_ERROR). More involved alignment
+ * methods, for example hidden Markov models, could use
+ * the AlignType string to provide a sensible decoding of
+ * the alignment, and in these cases, the AlignType maybe
+ * more informative than the SeqFeatureLocation provided
+ * by the Alignment.
+ *
+ **/
+ typedef string AlignType;
+ typedef sequence <AlignType> AlignTypeList;
+
+ /* NOTE: not as other constants */
+ const AlignType PROTEIN = "PROTEIN";
+ const AlignType NON_PROTEIN = "NON_PROTEIN";
+ const AlignType SEQUENCE_ERROR = "SEQUENCE_ERROR";
+ const AlignType UNKNOWN = "UNKNOWN";
+
+ //from bioJava
+ //StringList get_keys();
+
+ unsigned long get_num_rows();
+ unsigned long get_num_columns();
+
+ /**
+ * This method allows the retrieval of AlignmentElements. They
+ * correspond to the rows in a traditional textually represented
+ * alignment; typically, the AlignmentElements are
+ * sequences. Uses the list/iterator hybrid to provide access to
+ * the AlignmentElements. A list of no more than how_many
+ * elements starting at start is returned as the direct
+ * result. The remaining elements, if any, are available through
+ * the iterator returned in the out parameter. This is
+ * particularly useful for Assemblies, where for a particular
+ * region, only a few sequences from thousands are relevant.
+ **/
+ AlignmentElementList get_alignment_elements ( in unsigned long start,
+ in unsigned long how_many,
+ out Iterator the_rest
+ )
+ raises (OutOfBounds);
+
+ //Do we need IndetifierNotResolvable and/or retype key into Identifier?
+ AlignmentElement get_alignment_element( in string key)
+ raises (DoesNotExist);
+
+
+ /** The input parameter key unambiguously identifies an
+ * AlignmentElement within the Alignment. For each
+ * correspondence, each AlignmentElement will have a
+ * particular SeqFeatureLocation, returned by
+ * get_seq_region(). A null SeqFeatureLocation indicates
+ * that there is no region for this correspondence (i.e.,
+ * a gap). Multiple gaps are represented by multiple
+ * SeqFeatureLocations. To find the "length" of a gap, it
+ * is necessary to check other correspondences in the
+ * column. A null SeqFeatureLocation contains no length
+ * information. The input parameter the_interval
+ * represents an interval in the coordinates of the
+ * Alignment, not that of the underlying Object. If the
+ * interval includes a gap at the start, middle or end,
+ * the returned SeqFeatureLocation does not show it,
+ * because the start and end of it are in the coordinate
+ * system of the underlying Object which is unaware of any
+ * gaps. Instead, the corresponding segment of the
+ * underlying Object is indicated. It is assumed that the
+ * numbering of the correspondences is relevant, i.e.,
+ * that the second correspondence comes after the first,
+ * with all the intervals abutting. This allows an
+ * Interval of correspondences to be a valid concept.
+ **/
+ seqcore::SeqFeatureLocation get_seq_region ( in string key,
+ in seqcore::SeqFeatureLocation the_interval
+ )
+ raises (OutOfBounds, DoesNotExist,
+ seqcore::SeqFeatureLocationOutOfBounds,
+ seqcore::SeqFeatureLocationInvalid);
+
+ AlignType get_align_type_by_column ( in unsigned long col )
+ raises (OutOfBounds);
+
+ /**
+ * Convenience method. Returns edited sequence string of a
+ * sequence on a AlignmentElement. Special chars can be
+ * resolved using the Alphabet of the original sequence.
+ *
+ * NOTE: check this AlignmentElement/AnonymousSequence
+ **/
+ string get_edited_sequence_string( in string key)
+ raises (DoesNotExist);
+
+ /**
+ * Convenience method, which returns locations of gaps on a
+ * original unmodified sequence. Gaps are returned in a list of
+ * sub_regions. The start and end positions of top level
+ * SeqFeatureLocation indicates first and last gap on a
+ * sequence. NOTE: check is the method needed
+ **/
+ seqcore::SeqFeatureLocation get_gaps( in string key)
+ raises (DoesNotExist);
+
+
+ };
+ typedef sequence <Alignment> AlignmentList;
+
+ interface Assembly;
+ typedef sequence <Assembly> AssemblyList;
+
+ /**
+ * Assembly defines an entity used to represent a unique alignment
+ * of two or more sequences, which produce one or more contiguous
+ * consensus sequences (or consensus elements).<p>
+ *
+ * NOTE: Should this inherit from sequence collection / hash
+ * or Identifiable
+ **/
+ interface Assembly : Alignment {
+ /**
+ * List of consensus elements produced from the underlying
+ * fixed Assembly. This list can be empty if consensus
+ * elements are not available.
+ **/
+ AlignmentElementList get_consensus_elements();
+
+ /**
+ * Returns list of Assemblies which are linked with the
+ * Assembly. For example, assemblies which have sequence
+ * derived from same clone, but do not have connecting
+ * sequence between them.
+ * NOTES: Use iterator pattern?
+ **/
+ AssemblyList get_linked();
+
+
+ };
+
+
+
+
+ };
+
+
+
+};
+
+#endif // _DS_LSR_BSANE_COMPA_IDL_
View
383 idl/seqcore.idl
@@ -0,0 +1,383 @@
+//$Id: seqcore.idl,v 1.1 2001-09-27 16:34:49 jason Exp $
+#ifndef _DS_LSR_SEQCORE_IDL_
+#define _DS_LSR_SEQCORE_IDL_
+
+#pragma prefix "omg.org"
+
+#include <bsane.idl>
+
+module bsane {
+ module seqcore {
+
+ typedef short SeqFeatureLocationDef;
+ typedef short StrandTypeDef;
+ interface StrandType {
+ /** STRAND_NOT_KNOWN should be used in all cases not indicated
+ * below. **/
+ const StrandTypeDef NOT_KNOWN=3;
+ /** STRAND_NOT_APPLICABLE should be used for regions of
+ * AminoAcidSequences. **/
+ const StrandTypeDef NOT_APPLICABLE=2;
+ /** STRAND_PLUS should be used to indicate the original
+ * plus-strand of a NucleotideSequence. **/
+ const StrandTypeDef PLUS=1;
+ /** STRAND_MINUS should be used to indicate the reverse
+ * complement of the plus-strand of a NucleotideSequence. **/
+ const StrandTypeDef MINUS=-1;
+ /** STRAND_BOTH should be used to indicate both strands of a
+ * double-stranded NucleotideSequence. **/
+ const StrandTypeDef BOTH=0;
+ };
+
+ exception ReadingFrameInvalid {
+ string reason;
+ };
+
+ /**
+ * SeqType interface declares constants for different sequence
+ * types. Note that the sequence type must be consistent with the
+ * sequence alphabet.
+ * @see BioSequence @see AnonymousSequence @see
+ * Alphabet
+ **/
+ typedef short SeqTypeDef ;
+ interface SeqType {
+ const SeqTypeDef PROTEIN = 0;
+ const SeqTypeDef DNA = 1;
+ const SeqTypeDef RNA = 2;
+ const SeqTypeDef NOT_KNOWN = -1;
+ };
+
+ /**
+ * A interface declaring type codes for different types of
+ * fuzziness possible for a position on a biological sequence.
+ **/
+ typedef short FuzzyTypeDef ;
+ interface FuzzyType {
+ /** Position is 'exact'. **/
+ const FuzzyTypeDef EXACT = 1;
+
+ /** Exact position is unknown, but is within the range specified,
+ * e.g. for ((1.2)..100), the position is 1 or 2 for the start
+ * position of the region. **/
+ const FuzzyTypeDef WITHIN = 2;
+
+ /** Exact position is between two of the positions in the range
+ * specified, e.g. for (1^2), the position is between bases 1 and
+ * 2. **/
+ const FuzzyTypeDef BETWEEN = 3;
+
+ /** Exact lower boundary of position is unknown, but previous to
+ * the position specified, e.g. for (<10..100), the position
+ * starts before 10 for the start position of the region. **/
+ const FuzzyTypeDef BEFORE = 4;
+
+ /** Exact upper boundary of position is unknown, but following the
+ * position specified, e.g. for (>10..100), the position starts
+ * after 10 for the start position of the region. **/
+ const FuzzyTypeDef AFTER = 5;
+ };
+
+ /** A interface declaring type codes for region operators.
+ * @see SeqFeature
+ **/
+ typedef short SeqFeatureLocOpDef;
+ interface SeqFeatureLocationOperator {
+ const SeqFeatureLocOpDef NONE=0;
+ const SeqFeatureLocOpDef JOIN=1;
+ const SeqFeatureLocOpDef ORDER=3;
+ };
+
+ /**
+ * AnonymousSequence is a light weight sequence interface, which define
+ * basic accessor methods for token strings.
+ *
+ * @see BioSequence
+ **/
+ interface AnonymousSequence : Removable {
+
+ unsigned long get_length();
+
+ /** type of sequence. See: SeqType **/
+ SeqTypeDef get_type();
+
+ /** Return whether the sequence is circular or linear **/
+ boolean is_circular();
+
+ /**
+ * Return the sequence as a string. or rises
+ * RequestTooLarge exception if the sequence is too long
+ * for server to handle.
+ **/
+ string seq() raises ( RequestTooLarge ) ;
+
+ /** NOTE use FeatureLocation (rename if first to region)**/
+ string sub_seq ( in unsigned long start, in unsigned long end )
+ raises (OutOfBounds, RequestTooLarge);
+
+ };
+ typedef sequence<AnonymousSequence>AnonymousSequenceList;
+
+ /**
+ *
+ * A struct defining the properties of a position on a biological
+ * sequence.
+ * Rename to SeqPosition??
+ **/
+ struct SeqFeaturePosition {
+
+ /* Specifies the position of a SeqFeatureLocation on a
+ * sequence. **/
+ unsigned long position;
+
+ /* Specifies the extension of the position, e.g. for (1.3..12),
+ * the extension of the start position is 2. For (1..12), the
+ * extension of both the start and end positions is 0.
+ **/
+ unsigned long extension;
+
+ /* Specifies the type code describing the fuzzy character of the
+ * position as specified in FuzzyTypeCode interface.
+ **/
+ FuzzyTypeDef fuzzy;
+ };
+
+ /**
+ * A struct defining the properties of a SeqFeatureLocation object
+ * that specifies the region of a SeqFeatureLocation along a
+ * biological sequence using start and end positions. These
+ * positions may be 'fuzzy'. N.B. It is assumed that the region
+ * 'join(1..10,20)' is equivalent to 'join(1..10,20..20)'.
+ *
+ **/
+
+ struct SeqLocation {
+ /** Specifies the (fuzzy) start position of a SeqFeatureLocation
+ * along a biological sequence.
+ **/
+ SeqFeaturePosition start;
+
+ /** Specifies the (fuzzy) end position of a SeqFeatureLocation along a
+ * biological sequence.
+ **/
+ SeqFeaturePosition end;
+
+ /** Specifices which strand of the Seq the SeqFeatureLocation
+ * lies on.
+ **/
+ StrandTypeDef strand;
+ };
+
+ struct SeqFeatureLocation {
+ SeqLocation seq_location;
+
+ /** Region operator. NOTE: should be called location_operator? **/
+ SeqFeatureLocOpDef region_operator;
+
+ sequence <SeqLocation> sub_seq_locations;
+
+ Identifier id;
+
+ };
+ typedef sequence<SeqFeatureLocation> SeqFeatureLocationList;
+
+
+ // struct RemoteSeqFeatureLocation {
+ // Identifier id;
+ // BasicSeqFeatureLocation location;
+ // };
+ // struct CompositeSeqFeatureLocation {
+ // sequence <SeqFeatureLocation> sub_regions;
+ // };
+ // union SeqFeatureLocation switch (SeqFeatureLocationDef) {
+ // case 1: BasicSeqFeatureLocation basic;
+ // case 2: RemoteSeqFeatureLocation remote;
+ // case 2: CompositeSeqFeatureLocation composite;
+ // }
+
+
+ exception SeqFeatureLocationOutOfBounds {
+ SeqFeatureLocation invalid;
+ SeqFeatureLocation valid;
+ };
+
+ exception SeqFeatureLocationInvalid {
+ string reason;
+ };
+
+
+ /**
+ *
+ * For biomolecular sequences, Annotations are specialized to
+ * SeqFeatures to include sequence position information in the
+ * form of the SeqFeatureLocation attribute (see
+ * above). Essentially, this attribute indicates to which part of
+ * the sequence the annotation pertains, and is analogous to
+ * features in the DDBJ/EMBL/GenBank formats. Typical examples
+ * include gene, promoter region, and exons.
+ *
+ **/
+ interface SeqFeature : Annotation , Annotatable {
+
+ /** Return the start in biological coordinates of the
+ * SeqFeatureLocation on the Seq (1 is the first base).
+ **/
+ unsigned long get_start();
+
+ /** Return the end in biological coordinates of the
+ * SeqFeatureLocation on the Seq (start=1:end=2 are the
+ * first two bases).
+ **/
+ unsigned long get_end();
+
+ /** Return the regions of this SeqFeatureLocation along
+ * its Seq using SeqFeatureLocation objects. This is
+ * similar to how biojava works.
+ **/
+ SeqFeatureLocationList get_locations() ;
+
+
+ /**
+ * Return the reference to AnonymousSeq that this
+ * Annotation belongs to. **/
+ AnonymousSequence get_owner_sequence() raises(DoesNotExist ) ;
+
+ /**
+ * SeqFeature Annotation contains additional information
+ * in the form of so-called qualifiers, represented by
+ * the, which enables them to support many kinds of
+ * keyword controlled attributes. These properties are
+ * essential for covering the full spectrum of current
+ * annotation and feature information. Used to be in
+ * Annotation... should be replaced by Annotatable
+ *
+ * NOTE: remove ... inheritance from Annotatable or
+ * SequenceAnnotatable
+ **/
+ //NameValuePairList get_qualifiers();
+
+ };
+ typedef sequence<SeqFeature> SeqFeatureList;
+
+
+ /**
+ * Collection of SeqFeatures. Provides some basic query methods
+ * for getting the features by the SeqFeatureLocation.
+ **/
+ interface SeqFeatureCollection : AnnotationCollection {
+
+ /**
+ * Uses the list/iterator pattern to provide access to the
+ * Annotations. A list of no more than how_many elements
+ * is returned as the direct result. The remaining
+ * elements, if any, are available through the iterator
+ * returned in the out parameter. Only the SeqFeatures
+ * that overlap SeqFeatureLocation and have compatible
+ * StrandTypes will be returned.<p>
+ *
+ * Raises OutOfBounds if SeqFeatureLocation is out of bounds for
+ * the BioSequence who owns the collection.<p>
+ *
+ * Raises SeqFeatureLocationInvalid if the SeqFeatureLocation is
+ * invalid. Examples include an incorrect StrandType, or an
+ * invalid SeqFeatureLocation (e.g., one that has a wrong
+ * SeqFeatureLocationOperator or contains overlaps or
+ * circularities).
+ *
+ **/
+ SeqFeatureList get_features_on_region(
+ in unsigned long how_many,
+ in SeqFeatureLocation seq_region,
+ out Iterator the_rest)
+ raises(SeqFeatureLocationOutOfBounds,
+ SeqFeatureLocationInvalid);
+
+ /**
+ * Gives number of annotations that fall between
+ * (inclusive) the seq_region are included. Raises
+ * OutOfBounds if seq_region is out of bounds for this
+ * BioSequence who owns the collection. Raises
+ * SeqFeatureLocationInvalid if the SeqFeatureLocation is
+ * invalid. Examples include an incorrect StrandType, or
+ * an invalid composite SeqFeatureLocation (e.g., one that
+ * has a wrong SeqFeatureLocationOperator or contains
+ * overlaps or circularities).
+ *
+ **/
+ unsigned long num_features_on_region(in SeqFeatureLocation
+ seq_region)
+ raises(SeqFeatureLocationOutOfBounds,
+ SeqFeatureLocationInvalid);
+
+ };
+
+ /**
+ *
+ * Interface provides a method from getting SeqFeatureCollection.
+ *
+ * NOTE: The interface also can guarantee that returned
+ * collections, SeqFeatureCollection and AnnotationCollection
+ * (inherited from Annotatable), are separate instances.
+ *
+ * NOTE: new. check this with others
+ **/
+ interface AnnotatableSequence : Annotatable {
+
+ /**
+ * NOTE: reference of get_seq_features() != get_annotations()
+ **/
+ SeqFeatureCollection get_seq_features();
+
+ };
+
+ /**
+ *
+ * Biological sequence with alphabet. BioSequence can contain
+ * special symbols, like gaps and ambiquity codes, which
+ * belongs to the alphabet.<p>
+ *
+ * NOTE: add SymnbolArray back...??
+ **/
+ interface BioSequence : AnonymousSequence,
+ Identifiable, AnnotatableSequence {
+
+ /**
+ * Returns narrowed AnonymousSequence instance made from
+ * this PrimarySequence so that the PrimarySequence itself
+ * can be removed using the remove-method (inherited from
+ * Removable).
+ **/
+ AnonymousSequence get_anonymous_sequence() ;
+
+ Alphabet get_alphabet() ; //REMOVED: raises ( DoesNotExist );
+
+ };
+ typedef sequence<BioSequence> BioSequenceList;
+
+
+ interface NucleotideSequence : BioSequence {
+
+ /** NOTE: RequestTooLarge-exception probably not needed **/
+ string reverse_complement ();
+ string reverse_complement_interval ( in seqcore::SeqFeatureLocation the_region )
+ raises (OutOfBounds);
+ string translate_seq ( in short reading_frame,
+ out Types::ULongSeq stop_locations
+ )
+ raises (seqcore::ReadingFrameInvalid);
+ string translate_seq_region (
+ in seqcore::SeqFeatureLocation seq_region,
+ out Types::ULongSeq stop_locations
+ )
+ raises (OutOfBounds, seqcore::SeqFeatureLocationInvalid);
+ };
+
+
+ };
+
+};
+
+
+#endif // _DS_LSR_SEQCORE_IDL_
+
+
View
22 idl/types.idl
@@ -0,0 +1,22 @@
+#ifndef TYPES_IDL
+#define TYPES_IDL
+
+// These definitions are part of ORB... should include the idl
+// file from the ORB distribution
+#pragma prefix "omg.org"
+
+module Types {
+
+ //Note: add ObjectSeq
+ typedef sequence<Object> ObjectSeq;
+
+ typedef sequence<octet> OctetSeq;
+ //should use this
+ typedef sequence<string> StringSeq;
+
+ typedef sequence<unsigned long> ULongSeq;
+
+};
+#endif
+
+

0 comments on commit 57afdba

Please sign in to comment.