In [None]:
#!/usr/bin/env python3

#Import Packages
import pandas as pd
import os
import numpy as np
import sys
import argparse

#readmafs()
def readmafs(popA, popB, neutral, mutation, seed):
    #Checks file extensions (files must end in .maf)
    f1 = popA.split(".")
    if f1[-1] == "gz":
        sys.exit("ERR: MAF file should be unzipped")
    f2 = popB.split(".")
    if f2[-1] == "gz":
        sys.exit("ERR: MAF file should be unzipped")

    #Import Data
    popA = pd.read_csv(popA, sep='\t', header=(0))
    popB = pd.read_csv(popB, sep='\t', header=(0))
    neutral = pd.read_csv(neutral, sep='\t', header=(0))
    mutation = pd.read_csv(mutation, sep='\t', header=(0))

    #Check Headers
    if not {'chromo', 'position', 'knownEM'}.issubset(popA.columns):
        sys.exit("ERR: Check file headers")
    if not {'chromo', 'position', 'knownEM'}.issubset(popB.columns):
        sys.exit("ERR: Check file headers")
    if not {'chromo', 'position'}.issubset(neutral.columns):
        sys.exit("ERR: Check file headers.")
    if not {'chromo', 'position'}.issubset(mutation.columns):
        sys.exit("ERR: Check file headers")

    #Parse Data
    der = pd.merge(popA, popB,
                   on=['chromo','position','major','minor','ref'],
                   how='inner')
    neu_der = pd.merge(der, neutral, on=['chromo','position'],
                   how='outer', indicator=True)
    mut_der = pd.merge(der, mutation, on=['chromo','position'],
                   how='outer', indicator=True)
    neu_der = neu_der[neu_der['_merge']=='both'].drop(columns=['_merge'])
    mut_der = mut_der[mut_der['_merge']=='both'].drop(columns=['_merge'])


def calcRAB(neu_der, mut_der, seed):
    np.random.seed(seed)
    index1=np.random.permutation(len(neu_der))[:10000]
    neu1=neu_der.iloc[index1]

    f_AD = mut_der['knownEM_x']
    f_BD = mut_der['knownEM_y']
    f_AN = neu1['knownEM_x']
    f_BN = neu1['knownEM_y']

    LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
    LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_AN))
    RAB = LAB/LBA

    return RAB, neu1

q025, q975 = np.percentile(jx_array, [2.5, 97.5])
avg = np.mean(jx_array)
print("avg[2.5%,97.5%] = ", avg,"[",q025,",",q975,"]")


#jackknife()
def jackknife(mut_der, neu1, psites, iter):
    jx = []
    for i in range(iter):
        Nsites = int(round(len(mut_der) * psites))
        index = np.random.permutation(len(mut_der))[:Nsites]
        mut_sub = mut_der.iloc[index]
        jx.append(calcRAB(mut_sub, neu1, seed))
    return np.array(jx)

In [18]:
#RAB-Mutation
np.random.seed(678)
index1=np.random.permutation(len(neu_der))[:10000]
neu1=neu_der.iloc[index1]
f_AD = mut_der['knownEM_x']
f_BD = mut_der['knownEM_y']
f_AN = neu1['knownEM_x']
f_BN = neu1['knownEM_y']
LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_AN))
RAB = LAB/LBA
print("RAB_mutation = ", RAB)

#Jack Knife 100 reps
jx=[]
for i in range(100):
    Nsites=int(round(len(mut_der)*.30))
    index=np.random.permutation(len(mut_der))[:Nsites]
    mut_sub=mut_der.iloc[index]
    f_AD = mut_sub['knownEM_x']
    f_BD = mut_sub['knownEM_y']
    f_AN = neu1['knownEM_x']
    f_BN = neu1['knownEM_y']
    LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
    LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_AN))
    RAB = LAB/LBA
    jx.append(RAB)  
jx_array = np.array(jx)
q025, q975 = np.percentile(jx_array, [2.5, 97.5])
avg = np.mean(jx_array)
print("avg[2.5%,97.5%] = ", avg,"[",q025,",",q975,"]")

#RAB-Neutral
np.random.seed(678)
index1=np.random.permutation(len(neu_der))[:10000]
neu1=neu_der.iloc[index1]
np.random.seed(7924)
index2=np.random.permutation(len(neu_der))[:10000]
neu2=neu_der.iloc[index2]

f_AD = neu1['knownEM_x']
f_BD = neu1['knownEM_y']
f_AN = neu2['knownEM_x']
f_BN = neu2['knownEM_y']
LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_AN))
RAB = LAB/LBA
print("RAB_neutral = ", RAB)



RAB_mutation =  0.11079400088221092
avg[2.5%,97.5%] =  0.11071637166995166 [ 0.09699298197540579 , 0.12235884795568479 ]
RAB_neutral =  1.0000000000000024
