In [133]:
#!/usr/bin/env python3

#Import Packages
import pandas as pd
import numpy as np
import sys
import argparse
import os

os.chdir('/Users/abc6435/Desktop')

#Imports Data
popA = pd.read_csv("hKIWA_derived.maf", sep='\t', header=(0))
popB = pd.read_csv("cKIWA_derived.maf", sep='\t', header=(0))
neutral = pd.read_csv("intergenic.txt", sep='\t', header=(0))
mutation = pd.read_csv("lossoffunction.txt", sep='\t', header=(0))

#Check Headers
if not {'chromo', 'position', 'knownEM'}.issubset(popA.columns):
    sys.exit("ERR: Check file headers")
if not {'chromo', 'position', 'knownEM'}.issubset(popB.columns):
     sys.exit("ERR: Check file headers")
if not {'chromo', 'position'}.issubset(neutral.columns):
    sys.exit("ERR: Check file headers.")
if not {'chromo', 'position'}.issubset(mutation.columns):
    sys.exit("ERR: Check file headers")

#Join popA and popB. Derive Mutations
der = pd.merge(popA, popB, on=['chromo','position','major','minor','ref'],how='inner')
neu_der = pd.merge(der, neutral, on=['chromo','position'],how='inner', indicator=True)
mut_der = pd.merge(der, mutation, on=['chromo','position'],how='inner', indicator=True)
neu_der = neu_der[neu_der['_merge']=='both'].drop(columns=['_merge'])
mut_der = mut_der[mut_der['_merge']=='both'].drop(columns=['_merge'])

#Report Number of Mutations
print("Number of derived mutations =", len(mut_der))

#Define calcRAB()
def calcRAB(neu_der, mut_der, seed):
    np.random.seed(seed)
    index1=np.random.permutation(len(neu_der))[:10000]
    neu1=neu_der.iloc[index1]
    f_AD = mut_der['knownEM_x']
    f_BD = mut_der['knownEM_y']
    f_AN = neu1['knownEM_x']
    f_BN = neu1['knownEM_y']
    LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
    LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_AN))
    RAB = LAB/LBA
    return RAB

#Define calcRAB_neu()
def calcRAB_neu(neu_der, seed):
    np.random.seed(seed)
    index1=np.random.permutation(len(neu_der))[:10000]
    neu1=neu_der.iloc[index1]
    index2=np.random.permutation(len(neu_der))[:10000]
    neu2=neu_der.iloc[index2]
    f_AD = neu1['knownEM_x']
    f_BD = neu1['knownEM_y']
    f_AN = neu2['knownEM_x']
    f_BN = neu2['knownEM_y']
    LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
    LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_AN))
    RAB_neu = LAB/LBA
    return RAB_neu

#Define calcRAB_subs()
def calcRAB_sub(neu_sub, mut_sub):
    index1=np.random.permutation(len(neu_sub))[:10000]
    neu1=neu_sub.iloc[index1]
    f_AD = mut_sub['knownEM_x']
    f_BD = mut_sub['knownEM_y']
    f_AN = neu1['knownEM_x']
    f_BN = neu1['knownEM_y']
    LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
    LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_AN))
    RAB_sub = LAB/LBA
    return RAB_sub

#Define samplesites()
def samplesites(sites, psites):
    nsites = int(round(len(sites) * psites))
    indices = np.random.permutation(len(sites))[:nsites]
    subsamp = sites.iloc[indices]
    return subsamp

#Define jackknife()
def jackknife(neu_der, mut_der, psites, iter):
    jx = []
    for i in range(iter):
        neu_sub = samplesites(neu_der, psites)
        mut_sub = samplesites(mut_der, psites)
        jx.append(calcRAB_sub(neu_sub, mut_sub))
    return np.array(jx)

Number of derived mutations = 52


In [125]:
print("calcRAB", calcRAB(neu_der, mut_der, 20))
print("calcRAB_neu",calcRAB_neu(neu_der, 20))
jx_array=jackknife(neu_der, mut_der, 0.30, 50)
print("jackknife", jx_array)
np.percentile(jx_array, [2.5, 97.5])

calcRAB 0.49996021018755277
calcRAB_neu 0.997745958689642
jackknife [0.47159731 0.51891518 0.48490448 0.52695551 0.49841548 0.48144475
 0.49460164 0.54992997 0.48821392 0.48887386 0.47862708 0.46922962
 0.52793898 0.46031483 0.48735413 0.51089491 0.53144162 0.51669986
 0.56949866 0.53362936 0.50097712 0.52363841 0.52479674 0.53233723
 0.50293681 0.51490698 0.51218943 0.49600597 0.47755311 0.505347
 0.51814853 0.46088669 0.5187938  0.50039767 0.48793453 0.51202884
 0.49450223 0.46478481 0.51378969 0.52299989 0.5386367  0.48034703
 0.49934603 0.50314021 0.55086327 0.51046443 0.49189439 0.51427589
 0.46121493 0.57167116]


array([0.46096054, 0.5653057 ])