Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
Conflicts:
	README.md
  • Loading branch information
eseraygun committed Mar 21, 2013
2 parents 9c082f1 + 53e5145 commit 3d11d2d
Show file tree
Hide file tree
Showing 13 changed files with 819 additions and 740 deletions.
24 changes: 24 additions & 0 deletions LICENSE.txt
@@ -0,0 +1,24 @@
Copyright (c) 2013, Eser Aygün
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Alignment nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9 changes: 9 additions & 0 deletions MANIFEST.in
@@ -0,0 +1,9 @@
# file GENERATED by distutils, do NOT edit
README.txt
setup.py
alignment/__init__.py
alignment/profile.py
alignment/profilealigner.py
alignment/sequence.py
alignment/sequencealigner.py
alignment/vocabulary.py
12 changes: 0 additions & 12 deletions README.md

This file was deleted.

45 changes: 45 additions & 0 deletions README.txt
@@ -0,0 +1,45 @@
=========
Alignment
=========

Alignment is a native Python library for generic sequence alignment. It is
useful in cases where your alphabet is arbitrarily large and you cannot use
traditional biological sequence analysis tools. It supports global and local
pairwise sequence alignment. I also plan to add support for profile-profile
alignments, but who knows when.

Usage
=====

Typical usage looks like this::

from alignment.sequence import Sequence
from alignment.vocabulary import Vocabulary
from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner

# Create sequences to be aligned.
a = Sequence('what a beautiful day'.split())
b = Sequence('what a disappointingly bad day'.split())

# Create a vocabulary and encode the sequences.
v = Vocabulary()
aEncoded = v.encodeSequence(a)
bEncoded = v.encodeSequence(b)

# Create a scoring and align the sequences using global aligner.
scoring = SimpleScoring(2, -1)
aligner = GlobalSequenceAligner(scoring, -2)
score, encodeds = aligner.align(aEncoded, bEncoded, backtrace=True)

# Iterate over optimal alignments and print them.
for encoded in encodeds:
alignment = v.decodeSequenceAlignment(encoded)
print alignment
print 'Alignment score:', alignment.score
print 'Percent identity:', alignment.percentIdentity()
print

TODO List
=========

* Profile-profile alignment is not working yet.
214 changes: 109 additions & 105 deletions alignment/profile.py
Expand Up @@ -3,128 +3,132 @@

from alignment.sequence import *

# Profile ----------------------------------------------------------------------

# Profile ---------------------------------------------------------------------

class SoftElement(object):

def __init__(self, weights):
self.__weights = dict(weights)
def __init__(self, weights):
self.__weights = dict(weights)

def key(self):
if len(self.__weights) == 1:
for element in self.__weights:
return element
else:
return None
def key(self):
if len(self.__weights) == 1:
for element in self.__weights:
return element
else:
return None

def mergedWith(self, other):
weights = dict(self.__weights)
for element, weight in other.__weights.iteritems():
weights[element] = weights.get(element, 0) + weight
return SoftElement(weights)
def mergedWith(self, other):
weights = dict(self.__weights)
for element, weight in other.__weights.iteritems():
weights[element] = weights.get(element, 0) + weight
return SoftElement(weights)

def pairs(self):
return self.__weights.iteritems()
def pairs(self):
return self.__weights.iteritems()

def sorted(self):
return sorted(self.__weights.iteritems(), key=lambda w: (-w[1], w[0]))
def sorted(self):
return sorted(self.__weights.iteritems(), key=lambda w: (-w[1], w[0]))

def probabilities(self):
t = sum(self.__weights.itervalues())
return {e: float(w) / t for e, w in self.__weights.iteritems()}
def probabilities(self):
t = sum(self.__weights.itervalues())
return {e: float(w) / t for e, w in self.__weights.iteritems()}

def toDict(self):
return self.__weights
def toDict(self):
return self.__weights

@classmethod
def fromDict(cls, d):
return cls(d)
@classmethod
def fromDict(cls, d):
return cls(d)

def __eq__(self, other):
return self.__weights == other.__weights
def __eq__(self, other):
return self.__weights == other.__weights

def __len__(self):
return len(self.__weights)
def __len__(self):
return len(self.__weights)

def __getitem__(self, item):
return self.__weights[item]
def __getitem__(self, item):
return self.__weights[item]

def __iter__(self):
return iter(self.__weights)
def __iter__(self):
return iter(self.__weights)

def __repr__(self):
return repr(self.sorted())
def __repr__(self):
return repr(self.sorted())

def __str__(self):
weights = self.sorted()
if len(weights) == 1:
return str(weights[0][0])
else:
return "{%s}" % (",".join("%s:%d" % w for w in weights))
def __str__(self):
weights = self.sorted()
if len(weights) == 1:
return str(weights[0][0])
else:
return '{%s}' % (','.join('%s:%d' % w for w in weights))

def __unicode__(self):
weights = self.sorted()
if len(weights) == 1:
return unicode(weights[0][0])
else:
return u"{%s}" % (u",".join(u"%s:%d" % w for w in weights))
def __unicode__(self):
weights = self.sorted()
if len(weights) == 1:
return unicode(weights[0][0])
else:
return u'{%s}' % (u','.join(u'%s:%d' % w for w in weights))

class Profile(Sequence):

@classmethod
def fromSequence(cls, sequence):
elements = [SoftElement({e: 1}) for e in sequence]
return cls(elements)

@classmethod
def fromSequenceAlignment(cls, alignment):
profile = cls()
for i in xrange(len(alignment)):
a = alignment.first[i]
b = alignment.second[i]
if a == b:
element = SoftElement({a: 2})
else:
element = SoftElement({a: 1, b: 1})
profile.push(element)
return profile

def __init__(self, elements=None):
if elements is None:
super(Profile, self).__init__(list())
else:
if not all(isinstance(e, SoftElement) for e in elements):
raise ValueError("profile elements must belong to SoftElement class")
super(Profile, self).__init__(list(elements))

def key(self):
return tuple(e.key() for e in self.elements)

def pattern(self):
words = list()
for word in self.key():
if word is None:
words.append(u"*")
else:
words.append(word)
return u" ".join(words)

def minVariationCount(self):
return max(len(e) for e in self.elements)

def maxVariationCount(self):
return reduce(operator.mul, (len(e) for e in self.elements))

def mergeWith(self, other):
if len(self) != len(other):
raise ValueError("profiles with different lengths cannot be merged")
self.elements = [a.mergedWith(b) for a, b in izip(self.elements, other.elements)]

def toDict(self):
return [e.toDict() for e in self.elements]

@classmethod
def fromDict(cls, d):
elements = [SoftElement.fromDict(e) for e in d]
return cls(elements)
class Profile(Sequence):

@classmethod
def fromSequence(cls, sequence):
elements = [SoftElement({e: 1}) for e in sequence]
return cls(elements)

@classmethod
def fromSequenceAlignment(cls, alignment):
profile = cls()
for i in xrange(len(alignment)):
a = alignment.first[i]
b = alignment.second[i]
if a == b:
element = SoftElement({a: 2})
else:
element = SoftElement({a: 1, b: 1})
profile.push(element)
return profile

def __init__(self, elements=None):
if elements is None:
super(Profile, self).__init__(list())
else:
if not all(isinstance(e, SoftElement) for e in elements):
raise ValueError(
'profile elements must belong to SoftElement class')
super(Profile, self).__init__(list(elements))

def key(self):
return tuple(e.key() for e in self.elements)

def pattern(self):
words = list()
for word in self.key():
if word is None:
words.append(u'*')
else:
words.append(word)
return u' '.join(words)

def minVariationCount(self):
return max(len(e) for e in self.elements)

def maxVariationCount(self):
return reduce(operator.mul, (len(e) for e in self.elements))

def mergeWith(self, other):
if len(self) != len(other):
raise ValueError(
'profiles with different lengths cannot be merged')
self.elements = [a.mergedWith(b)
for a, b in izip(self.elements, other.elements)]

def toDict(self):
return [e.toDict() for e in self.elements]

@classmethod
def fromDict(cls, d):
elements = [SoftElement.fromDict(e) for e in d]
return cls(elements)

0 comments on commit 3d11d2d

Please sign in to comment.