In [31]:
__description__ = \
"""
1. Sample LIBRARY_SIZE sequences from all possible.  
    Problems: random sampling and (maybe) bias in initial library.
2. Sample from LIBRARY_SIZE according to protein/peptide binding constant.
    Limiting reagent: phage particles binding to an infinite field of protiens (Do individual clones become
                      limiting?)
    Problems: C
    
3. Amplify output from previous sample (phage into log-growth bacteria).  
    Limiting reagent: phage particles into infinite vat of bacteria
    Problems: multiple hits, slow growers, fast growers

"""

import numpy as np
import string

AMINO_ACIDS = ("A","C","D","E","F",
               "G","H","I","K","L",
               "M","N","P","Q","R",
               "S","T","V","W","Y")

BASES = ("A","T","G","C")

BINARY = ("A","D")

class SeqIntegerMapper:
    """
    To save memory and avoid big-ole dicts, treat sequences as base-"alphabet size" numbers that
    are converted to base 10 integers.  Can handle alphabets up to 36 letters long. 
    """
    
    def __init__(self,alphabet=AMINO_ACIDS):
        
        self._alphabet = alphabet
        self._base = len(alphabet)
        self.possible_digits = string.digits + string.ascii_lowercase
        
        self.string_to_base = dict([(letter,self.possible_digits[i]) for i, letter in enumerate(alphabet)])
                
    @property
    def alphabet(self):
        """ Get alphabet. """
        return self._alphabet

    @property
    def base(self):
        """ Get base. """
        return self._base
    
        
    def seqToInt(self,sequence_string):
        """
        Not working yet
        """
    
        sequence_in_base = "".join([self.string_to_base[s] for s in sequence_string])
        
        return int(sequence_in_base,self._base)
    
    
    def intToSeq(self,sequence_integer,seq_length):
        """
        Return the equivalent string to the integer assuming "alphabet".
        """
            
        i = 0
        digits = list(self._alphabet[0])*seq_length
        while sequence_integer:
            digits[i] = self._alphabet[sequence_integer % self._base]
            sequence_integer //= self._base
            i += 1
          
        digits.reverse()
        
        return "".join(digits)
    

In [32]:
class SamplePool(object):
    """ 
    Stupidly simple sampling object... 
    This is a just an object wrapping numpy's sampling method `choice`
    The point is that we add properties and methods to SamplePool object that
    help us query the pool in simulations. 
    """   
 
    def __init__(self, sequences, weights):
        """"""
        self._sequences = np.array(sequences)
        self._weights = np.array(weights)
        
    @property
    def sequences(self):
        """ Get sequences. """
        return self._sequences
        
    @property
    def weights(self):
        """ Get weights. """
        return self._weights
        
    def sample(self, size):
        """ Return a sample. """
        return np.random.choice(self._sequences, size=size, replace=True, p=self._weights)

    def amplify(self,observation):
        """
        Amplify samplified sequences into a set of sequences with associated 
        probability distributions.  As implemented now, this won't distort
        any frequencies, but this method can be replaced in a subclass.
        """

        # What we *hope* is happening in the lab...
        # amp_factor = 2**num_rounds
        # sequences = [s*amp_factor for s in sequences]
        
        counts = np.bincount(observation)
        values = np.nozero(counts)[0]
        weights = (1.0*counts[values])/sum(counts[values])
       
        return values, weights
 
        
class Pool:
    """
    Basic class for generating a pool from which to sample.  
    """ 

    def __init__(self,sequence_length=12,alphabet=(0,1)):
        """
        """

        self._sequence_length = sequence_length
        self._alphabet = alphabet[:]
  
    @property
    def sequence_length(self):
        """ Get sequence length. """
        return self._sequence_length

    @property
    def alphabet(self):
        """ Get alphabet. """
        return self._alphabet

In [38]:

import sys


mapper = SeqIntegerMapper()
print(mapper.alphabet)
print(mapper.base)

x = "YYYYYYYYYYYYYY"

xint = mapper.seqToInt(x)
y = mapper.intToSeq(xint,len(x))


print(x,xint,sys.maxsize,xint>sys.maxsize,y)

('A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y')
20
YYYYYYYYYYYYYY 1638399999999999999 9223372036854775807 False YYYYYYYYYYYYYY


In [40]:
20**15 > sys.maxsize

True

In [43]:
a = 9223372036854775807*1000

In [44]:
print(a)

9223372036854775807000
