In [1]:
## Forward simulator of haplotype frequencies with recombination

In [2]:
import numpy as np

In [10]:
# create generation 0 object
# set of original haplotypes which are sequences

In [2]:
# simulate a generation and create a generation object with contains haplotype objects
N0 = 100
Nprev = 100
Ncurrent = 100

In [222]:
# a haplotype object looks like this
myparents = np.array([1, 3])
myproportions = np.array([0.6, 0.4])
H1_1 = [myparents, myproportions]
H1_1

[array([1, 3]), array([0.6, 0.4])]

In [None]:
# to create each haplotype in any given generation 3 functions are required: get_recombinations, get_parents, get_proportions

In [178]:
# returns a 1D array containing 1 integer, the number of recombination events for new haplotype
def get_recombinations (r, L):
    "Returns number of recombination events drawn from poisson distribution given a scaled recombination rate"    
    lam =r*L  # scale recombination rate by length of haplotype
    recombinations = np.random.poisson(lam) # draw random value from poisson distribution being lambda the rescaled recombination rate 
    return recombinations    

In [216]:
get_recombinations(2e-8, 10e6)

1

In [217]:
# returns a 1D array containing the index of the parents of the haplotype drawn from the previous generation
def get_parents (recombinations, Nprev):
    "Returns a 1D array containing the index of the parents of the current haplotype drawn from the previous generation"
    numberOfparents = ((recombinations) + 1)
    parentsIndexes = np.array(range(1, (Nprev+1)))
    
    # random sampling of parents indexes from uniform distribution with replacement
    myParents = np.random.choice(parentsIndexes, numberOfparents, replace = True, p = [1/parentsIndexes.size]*parentsIndexes.size)
    return myParents

In [221]:
get_parents(1, 100)

array([16, 92])

In [223]:
# computes a 1D array of breakpoints, as many floats as recombination events drawn from a uniform normalised distribution
# returns a 1D array with proportions of each parent haplotype for descendant by computing differences between 0, successive breakpoints and 1  
def get_proportions (recombinations):
    "Picks randomly as many breakpoints as crossovers and returns 1D array with intervals between 0, the breakpoints and 1, i.e. the proportions parent haplotypes"
    breakpoints = np.random.uniform(0, 1, recombinations) # draw as many values from a uniform distribution as number of recombinations
    l = np.array([0, 1]) # create fixed limits for interval of proportions
    intervals = np.diff(np.sort(np.append(l, breakpoints), axis = None)) # calculate differences between sorted interval limits and breakpoints
    return intervals


In [225]:
get_proportions(2)

array([0.50385711, 0.46398656, 0.03215632])

In [46]:
# define class haplotype with attributes recombination rate and length in bp
class Haplotype:
    def __init__(self, scaledRecombinationRate=-1, popsizePrevGeneration=0):
        """Initialize a haplotype object.
        """
        self.myParentsFromPrevGeneration  = []
        self.myRecombinationIntervals = []
        self.numRecombinations = -1
        if (scaledRecombinationRate > 0 and popsizePrevGeneration > 0):
            self.haplotype_birth(scaledRecombinationRate, popsizePrevGeneration)
        else: 
            print("Uninitialized haplotype created! Still a foetus!! Who am I, why am I here?")
            
    def haplotype_birth(self, scaledRecombinationRate, popsizePrevGeneration):
        """Populate all internal haploytpe variables.
        """
        if (self.numRecombinations != -1):
            print("Already alive! Get out of my room!")
            return(None)
        self.numRecombinations = np.random.poisson(scaledRecombinationRate)
        self.get_parents(popsizePrevGeneration)
        self.get_proportions()
        
    # returns a 1D array containing the index of the parents of the haplotype drawn from the previous generation
    def get_parents (self, popsizePrevGeneration):
        """Returns a 1D array containing the index of the parents 
        of the current haplotype drawn from the previous generation
        """
        numberOfParents = ((self.numRecombinations) + 1)
        # random sampling of parents indexes from uniform distribution with replacement
        self.myParentsFromPrevGeneration = np.random.randint(popsizePrevGeneration, size=numberOfParents)
       
    # returns a 1D array with proportions of each parent haplotype for descendant by computing differences between 0, successive breakpoints and 1  
    def get_proportions (self):
        """Picks randomly as many breakpoints as crossovers and 
        returns 1D array with intervals between 0, the breakpoints 
        and 1, i.e. the proportions parent haplotypes"""
        breakpoints = np.random.uniform(0, 1, self.numRecombinations) # draw as many values from a uniform distribution as number of recombinations
        l = np.array([0, 1]) # create fixed limits for interval of proportions
        self.myRecombinationIntervals = np.diff(np.sort(np.append(l, breakpoints), axis = None)) # calculate differences between sorted interval limits and breakpoints
        
    def print_haplotype(self):
        """Print details of this haplotype.
        """
#        print("Number of recombinations: %d"%(self.numRecombinations))
        print("                 Parents: %s"%(", ".join([str(x) for x in self.myParentsFromPrevGeneration])))
        print("             Proportions: %s"%(", ".join([str(np.round(x, 3)) for x in self.myRecombinationIntervals])))
        

In [38]:
for i in range(200):
    testHaplotype = Haplotype()
#    testHaplotype.haplotype_birth(1,100)
    print("Replicate: %d"%(i))
    testHaplotype.print_haplotype()

Uninitialized haplotype created! Still a foetus!!
Replicate: 0
Number of recombinations: -1
                 Parents: 
             Proportions: 
Uninitialized haplotype created! Still a foetus!!
Replicate: 1
Number of recombinations: -1
                 Parents: 
             Proportions: 
Uninitialized haplotype created! Still a foetus!!
Replicate: 2
Number of recombinations: -1
                 Parents: 
             Proportions: 
Uninitialized haplotype created! Still a foetus!!
Replicate: 3
Number of recombinations: -1
                 Parents: 
             Proportions: 
Uninitialized haplotype created! Still a foetus!!
Replicate: 4
Number of recombinations: -1
                 Parents: 
             Proportions: 
Uninitialized haplotype created! Still a foetus!!
Replicate: 5
Number of recombinations: -1
                 Parents: 
             Proportions: 
Uninitialized haplotype created! Still a foetus!!
Replicate: 6
Number of recombinations: -1
                 Parents: 
     

In [47]:
# define class generation with attribute 'populate' which is a for loop of instantiation of class haplotype as many times as Ncurrent
class Generation:
    def __init__(self, scaledRecombinationRate=-1, popsizePrevGeneration=0, popsizeCurGeneration=0):
        """Initialize a generation object.
        """
        self.haplotypes = []
        if (popsizeCurGeneration == 0):
            popsizeCurGeneration = popsizePrevGeneration
        if scaledRecombinationRate > 0 and popsizePrevGeneration > 0 and popsizeCurGeneration > 0:
            self.populate_generation(scaledRecombinationRate, popsizePrevGeneration, popsizeCurGeneration)

    # function to populate haplotypes of this generation 
    def populate_generation(self, scaledRecombinationRate, popsizePrevGeneration, popsizeCurGeneration):
        """Populate the haplotypes of this generation.
        """
        self.haplotypes = [Haplotype(scaledRecombinationRate, popsizePrevGeneration) for x in range(popsizeCurGeneration)]

    # function to print generations information:
    def print_generation(self):
        """Print the information on this generation.
        """
        print("Current population size: %d"%(len(self.haplotypes)))
        for x in range(len(self.haplotypes)):
            self.haplotypes[x].print_haplotype()

In [48]:
genX = Generation(scaledRecombinationRate=1, popsizePrevGeneration=100)
genX.print_generation()

Current population size: 100
                 Parents: 92, 76
             Proportions: 0.134, 0.866
                 Parents: 77, 65, 75
             Proportions: 0.058, 0.355, 0.586
                 Parents: 25, 45, 95
             Proportions: 0.493, 0.274, 0.233
                 Parents: 32
             Proportions: 1.0
                 Parents: 70, 28, 79
             Proportions: 0.791, 0.142, 0.067
                 Parents: 34, 50
             Proportions: 0.817, 0.183
                 Parents: 28, 66
             Proportions: 0.242, 0.758
                 Parents: 98
             Proportions: 1.0
                 Parents: 74, 16
             Proportions: 0.907, 0.093
                 Parents: 73, 71, 29
             Proportions: 0.297, 0.436, 0.266
                 Parents: 27, 13
             Proportions: 0.759, 0.241
                 Parents: 91
             Proportions: 1.0
                 Parents: 38
             Proportions: 1.0
                 Parents: 34, 88, 0
       