In [129]:
from pysam import VariantFile

In [130]:
childVCF = VariantFile("HG002-NA24385-50x_filtered.vcf")
fatherVCF = VariantFile("HG003.hs37d5.60x.1.converted_filtered.vcf")
motherVCF = VariantFile("HG004.hs37d5.60x.1.converted_filtered.vcf")
childDeNovoVCF = VariantFile('temp.vcf', 'w', header=childVCF.header)
CHROM_POWER = 10**9

In [131]:
def writeToChildDeNovo(condition, variant):
    if condition: 
        childDeNovoVCF.write(variant)

In [132]:
def updateVariantIterator(variant, VCF, pos):
    flag = False
    if variant.chrom != "chrX": 
        variant = next(VCF, None)
        if variant.chrom != "chrX": 
            pos = (pos // CHROM_POWER + int(variant.chrom != ("chr" + str(pos // CHROM_POWER)))) * CHROM_POWER + variant.pos
    else: 
        flag = True
    return variant, pos, flag

In [None]:
def checkDeNovoOnExistingVariants(childVariant, fatherVariant, motherVariant):
    if motherVariant is None:
        return False
        #return caseSingle(childVariant, fatherVariant)#---------------------------
    if fatherVariant is None:
        return False
        #return caseSingle(childVariant, motherVariant)#------------------
    
    #both parents and the child have a variation on the same position
    
    motherGT = next(motherVariant.samples.itervalues())['GT']
    fatherGT = next(fatherVariant.samples.itervalues())['GT']
    childGT = next(childVariant.samples.itervalues())['GT']
    gt00 = (0,0)
    gt01 = (0,1)
    gt11 = (1,1)
    gt12 = (1,2)
    
    if motherGT == gt01 and fatherGT == gt01:
        #child has the same variant as one parent on one chromosome
        if childGT == gt01 and (childVariant.ref == motherVariant.ref and childVariant.alts == motherVariant.alts
                                or childVariant.ref == fatherVariant.ref and childVariant.alts == fatherVariant.alts):
            return False
        #both parents have the same (common) variant and the child got it from both parents (on both chromosomes)
        if childGT == gt11 and (childVariant.ref == motherVariant.ref and childVariant.alts == motherVariant.alts
                                and childVariant.ref == fatherVariant.ref and childVariant.alts == fatherVariant.alts): 
            return False
        #child got both parents' variants
        #-----------------------------------------special case if refs are different
        if childGT == gt12 and (childVariant.ref == motherVariant.ref and childVariant.ref == fatherVariant.ref
                                and (childVariant.alts[0] == motherVariant.alts[0] and childVariant.alts[1] == fatherVariant.alts[0]
                                     or childVariant.alts[0] == fatherVariant.alts[0] and childVariant.alts[1] == motherVariant.alts[0])): 
            return False
        return True
    if motherGT == gt01 and fatherGT == gt11:
        return False

In [21]:
def findDeNovoVariants():
    fatherVariant = next(fatherVCF.fetch())
    motherVariant = next(motherVCF.fetch())
    fatherPos = CHROM_POWER + fatherVariant.pos
    motherPos = CHROM_POWER + motherVariant.pos
    childPos = CHROM_POWER
    for childVariant in childVCF.fetch():
        
        #child variation position with child chromosome number as billions
        #to make all positions in file continual
        childPos = (childPos // CHROM_POWER + int(childVariant.chrom != ('chr' + str(childPos // CHROM_POWER)))) * CHROM_POWER + childVariant.pos
        
        #child variant pos is greater than at least one of this parents
        #iterate through parents variants until you reach child pos
        while childPos>motherPos or childPos>fatherPos:
            #if both parents have a variation on the same pos, but child doesnt
            if motherPos == fatherPos:
                #special case ----------------------------------- case 4
                fatherVariant = next(fatherVCF, None)
                motherVariant = next(motherVCF, None)
                fatherPos = (fatherPos // CHROM_POWER + int(fatherVariant.chrom != ('chr' + str(fatherPos // CHROM_POWER)))) * CHROM_POWER + fatherVariant.pos
                motherPos = (motherPos // CHROM_POWER + int(motherVariant.chrom != ('chr' + str(motherPos // CHROM_POWER)))) * CHROM_POWER + motherVariant.pos
            #both parents are smaller and one is smaller than other
            #or one parent is smaller and one parent is greater than or equal to child
            elif motherPos < fatherPos:
                #case 3
                #writeToChildDeNovo(checkHereditaryVariant(childVariant, None, MotherVariant),childVariant)---------------------
                motherVariant = next(motherVCF, None)
                motherPos = (motherPos // CHROM_POWER + int(motherVariant.chrom != ('chr' + str(motherPos // CHROM_POWER)))) * CHROM_POWER + motherVariant.pos
            else :
                #case 3
                #writeToChildDeNovo(checkHereditaryVariant(childVariant, fatherVariant, None),childVariant)---------------------
                fatherVariant = next(fatherVCF, None)
                fatherPos = (fatherPos // CHROM_POWER + int(fatherVariant.chrom != ('chr' + str(fatherPos // CHROM_POWER)))) * CHROM_POWER + fatherVariant.pos
        
        #at this point, child is <= than both parents
        
        # if child pos is smaller than both parents' pos, that's a deNovo variation
        #this variant doesnt exist in mother or father
        if childPos<motherPos and childPos<fatherPos:
            writeToChildDeNovo(True, childVariant)
            continue
         
        #at this point, child is either equal to both parents or smaller than one and equal to other
        
        if childPos<motherPos:
            #child pos is equal to father
            #father and child have a variation on the same position
            #case 2
            writeToChildDeNovo(checkDeNovoOnExistingVariants(childVariant, fatherVariant, None),childVariant)
            fatherVariant = next(fatherVCF, None)
            fatherPos = (fatherPos // CHROM_POWER + int(fatherVariant.chrom != ('chr' + str(fatherPos // CHROM_POWER)))) * CHROM_POWER + fatherVariant.pos
            continue
        
        if childPos<fatherPos:
            #child pos is equal to mother
            #mother and child have a variation on the same position
            #case 2
            writeToChildDeNovo(checkDeNovoOnExistingVariants(childVariant, None, motherVariant),childVariant)#--------
            motherVariant = next(motherVCF, None)
            motherPos = (motherPos // CHROM_POWER + int(motherVariant.chrom != ('chr' + str(motherPos // CHROM_POWER)))) * CHROM_POWER + motherVariant.pos
            continue
        
        #only option left - all three have a variation on the same position
        #case 1
        writeToChildDeNovo(checkDeNovoOnExistingVariants(childVariant, fatherVariant, motherVariant),childVariant) #---------------
        fatherVariant = next(fatherVCF, None)
        motherVariant = next(motherVCF, None)
        fatherPos = (fatherPos // CHROM_POWER + int(fatherVariant.chrom != ('chr' + str(fatherPos // CHROM_POWER)))) * CHROM_POWER + fatherVariant.pos
        motherPos = (motherPos // CHROM_POWER + int(motherVariant.chrom != ('chr' + str(motherPos // CHROM_POWER)))) * CHROM_POWER + motherVariant.pos

In [None]:
findDeNovoVariants()