In [1]:
## Forward simulator of haplotype frequencies with recombination

In [1]:
import numpy as np

In [2]:
# define class haplotype with attributes recombination rate and length in bp
class Haplotype:
    def __init__(self, scaledRecombinationRate=-1, popsizePrevGeneration=0):
        """Initialize a haplotype object.
        """
        self.myParentsFromPrevGeneration  = []
        self.myRecombinationIntervals = []
        self.numRecombinations = -1
        if (scaledRecombinationRate > 0 and popsizePrevGeneration > 0):
            self.haplotype_birth(scaledRecombinationRate, popsizePrevGeneration)
        else: 
            print("Uninitialized haplotype created. Still a foetus! Who am I, why am I here?")
            
    def haplotype_birth(self, scaledRecombinationRate, popsizePrevGeneration):
        """Populate all internal haploytpe variables.
        """
        if (self.numRecombinations != -1):
            print("Already alive! Get out of my room!")
            return(None)
        self.numRecombinations = np.random.poisson(scaledRecombinationRate)
        self.get_parents(popsizePrevGeneration)
        self.get_proportions()
        
    # returns a 1D array containing the index of the parents of the haplotype drawn from the previous generation
    def get_parents (self, popsizePrevGeneration):
        """Returns a 1D array containing the index of the parents 
        of the current haplotype drawn from the previous generation
        """
        numberOfParents = ((self.numRecombinations) + 1)
        # random sampling of parents indexes from uniform distribution with replacement
        self.myParentsFromPrevGeneration = np.random.randint(popsizePrevGeneration, size=numberOfParents)
       
    # returns a 1D array with proportions of each parent haplotype for descendant by 
    # computing differences between 0, successive breakpoints and 1  
    def get_proportions (self):
        """Picks randomly as many breakpoints as crossovers and 
        returns 1D array with intervals between 0, the breakpoints 
        and 1, i.e. the proportions of parent haplotypes"""
        breakpoints = np.random.uniform(0, 1, self.numRecombinations) # draw as many values from a uniform distribution as number of recombinations
        l = np.array([0, 1]) # create fixed limits for interval of proportions
        self.myRecombinationIntervals = np.diff(np.sort(np.append(l, breakpoints), axis = None)) # calculate differences between sorted interval limits and breakpoints
        
    def print_haplotype(self):
        """Print details of this haplotype.
        """
#        print("Number of recombinations: %d"%(self.numRecombinations))
        print("                 Parents: %s"%(", ".join([str(x) for x in self.myParentsFromPrevGeneration])))
        print("             Proportions: %s"%(", ".join([str(np.round(x, 3)) for x in self.myRecombinationIntervals])))
        

In [5]:
for i in range(200):
    testHaplotype = Haplotype()
    testHaplotype.haplotype_birth(1,100)
    #print("Replicate: %d"%(i))
    testHaplotype.print_haplotype()

Uninitialized haplotype created. Still a foetus! Who am I, why am I here?
                 Parents: 79
             Proportions: 1.0
Uninitialized haplotype created. Still a foetus! Who am I, why am I here?
                 Parents: 66
             Proportions: 1.0
Uninitialized haplotype created. Still a foetus! Who am I, why am I here?
                 Parents: 5
             Proportions: 1.0
Uninitialized haplotype created. Still a foetus! Who am I, why am I here?
                 Parents: 52
             Proportions: 1.0
Uninitialized haplotype created. Still a foetus! Who am I, why am I here?
                 Parents: 94
             Proportions: 1.0
Uninitialized haplotype created. Still a foetus! Who am I, why am I here?
                 Parents: 68, 2
             Proportions: 0.442, 0.558
Uninitialized haplotype created. Still a foetus! Who am I, why am I here?
                 Parents: 92, 33, 15
             Proportions: 0.386, 0.364, 0.25
Uninitialized haplotype created. St

In [6]:
# define class generation with attribute 'populate' which is a for loop of 
# instantiation of class haplotype as many times as Ncurrent
class Generation:
    def __init__(self, scaledRecombinationRate=-1, popsizePrevGeneration=0, popsizeCurGeneration=0):
        """Initialize a generation object.
        """
        self.haplotypes = []
        if (popsizeCurGeneration == 0):
            popsizeCurGeneration = popsizePrevGeneration
        if scaledRecombinationRate >= 0 and popsizePrevGeneration > 0 and popsizeCurGeneration > 0:
            self.populate_generation(scaledRecombinationRate, popsizePrevGeneration, popsizeCurGeneration)

    # function to populate haplotypes of this generation 
    def populate_generation(self, scaledRecombinationRate, popsizePrevGeneration, popsizeCurGeneration):
        """Populate the haplotypes of this generation.
        """
        self.haplotypes = [Haplotype(scaledRecombinationRate, popsizePrevGeneration) for x in range(popsizeCurGeneration)]

    # function to print generations information:
    def print_generation(self):
        """Print the information on this generation.
        """
        print("Current population size: %d"%(len(self.haplotypes)))
        for x in range(len(self.haplotypes)):
            self.haplotypes[x].print_haplotype()
    

In [7]:
genX = Generation(scaledRecombinationRate=1, popsizePrevGeneration=100)
genX.print_generation()

Current population size: 100
                 Parents: 97
             Proportions: 1.0
                 Parents: 13
             Proportions: 1.0
                 Parents: 12
             Proportions: 1.0
                 Parents: 77, 8
             Proportions: 0.098, 0.902
                 Parents: 18
             Proportions: 1.0
                 Parents: 23, 53, 48, 47
             Proportions: 0.729, 0.164, 0.011, 0.096
                 Parents: 45, 47
             Proportions: 0.665, 0.335
                 Parents: 23
             Proportions: 1.0
                 Parents: 4
             Proportions: 1.0
                 Parents: 25, 71, 27
             Proportions: 0.211, 0.229, 0.56
                 Parents: 95, 64
             Proportions: 0.021, 0.979
                 Parents: 4
             Proportions: 1.0
                 Parents: 68
             Proportions: 1.0
                 Parents: 77, 14
             Proportions: 0.617, 0.383
                 Parents: 84, 86
     

In [None]:
class Simulation:
    def __init__(self, recombinationRate=-1, haplotypeLength=0, popsizeGenZero=0, numOfGenerations=0):
        """Initialize a simulation object.
        """
        self.generations = []
        
        # specify needed parameters from given variables
        self.scaledRecombinationRate = recombinationRate*haplotypeLength
        self.numOfGenerations = numOfGenerations
        
        # open relevant files containing the original haplotypes to track lineages at the end
        # and info about population size changes along the simulated generations
        self.popsizesList = open('popSizes.txt', 'r+')
        self.haplotypesGenZero = open('haplotypesGenZero.txt', 'r+')
        
        
        ### something about the population sizes ###
        if popsizeGenZero #####
            popsizePrevGeneration = ####
        
        if recombinationRate >= 0  and popsizeGenZero > 0 and numOfGenerations > 0:
            self.produce_simulation(scaledRecombinationRate, popsizePrevGeneration, popsizeCurGeneration)
          
                
    def produce_simulation(self, scaledRecombinationRate, popsizePrevGeneration, popsizeCurGeneration, numOfGenerations):
        """Simulate a given number of generations.
        """
        self.generations = [Generation(self, scaledRecombinationRate, popsizePrevGeneration, popsizeCurGeneration) 
                            for x in range(numOfGenerations)]
        
    
    def save_previous_generation(self):
        """Print haplotype information of the last generation of the simulation.
        """
        print()
        
    def deconvolve(self, haplotypesGenZero):
        """Tracks lineages backwards all the way to generation zero 
        to identify parent of origin of each locus.
        """
        

In [None]:
def deconvolve (self):
# imagine a generation 2, for which we know the parents from generation 1
# but now we also want to store the info as a function of generation 0
     # returns a 1D array containing the index of the parents of the haplotype drawn from the previous generation
    
# 1. identify parents of each haplotype in a given generation
# 2. identify parents of those parents in the n-1 generation
# Output: two arrays, one with index of parents in n-1 generation and one with corresponding 
# proportions of those parents 
    
    for haplotype in self.generations[2]:
        myGrandparents = []
        myProportionsOfGrandparents = []
    
        for parent in haplotype[0]: #for each parent of a given haplotype in gen 2
        # seek grandparents: find those indices in n-1 generation list
            print self.generations[1][parent] #get the grandparent haplotype indices
            myGrandparents.append(self.generations[1][parent]) # this is a list of haplotypes
            return myGrandparents
        
        for proportion in haplotype[1]: #for each proportion of a given haplotype in gen 2
            if proportion <= self.generations[1][myGrandparents[proportion]]: #if equal values of props just append
                myProportionsOfGrandparents.append(proportion)
          
                
            if proportion > self.generations[1][myGrandparents[proportion]]: #if bigger prop in grandkid
                #append the prop of the grandparent to list
                myProportionsOfGrandparents.append(self.generations[1][myGrandparents[proportion]])
                
                #calculate difference between grandkid prop and first grandparent prop
                remainingProportion = proportion - self.generations[1][myGrandparents[proportion]]
                
                #append difference to the list
                myProportionsOfGrandparents.append(remainingProportion)
                
                #merge proportions if they come from same grandparent
                
                
                

In [32]:
grandma = [[1, 4], [0.2, 0.8]]
grandpa = [[2, 3], [0.3, 0.7]]
grandparentslist = list(zip(grandma, grandpa))

biglist = list(zip(grandparentslist, [1,2,3]))

grandchild = [[0, 1], [0.4, 0.6]]
grandchild[0]
print(grandparentslist)

print(biglist)

[([1, 4], [2, 3]), ([0.2, 0.8], [0.3, 0.7])]
[(([1, 4], [2, 3]), 1), (([0.2, 0.8], [0.3, 0.7]), 2)]


In [34]:
for i in grandchild[0]:
    print (biglist[0][i])

([1, 4], [2, 3])
1
