# Calculate $R_{AB}$

## Set up

In [55]:
#Import Packages
import pandas as pd
import os
import numpy as np

#Set Working Directory
os.chdir('/Users/annamariacalderon/Desktop/')

#Set Variables
popA_mutation_sites = 'hKIWA_der_lossoffunction.maf'
popB_mutation_sites = 'cKIWA_der_lossoffunction.maf'
popA_intergenic_sites = 'hKIWA_der_intergenic1.maf'
popB_intergenic_sites = 'cKIWA_der_intergenic1.maf'


#Import Data
popA_mut = pd.read_csv(popA_mutation_sites, sep='\t', header=(0))
popB_mut = pd.read_csv(popB_mutation_sites, sep='\t', header=(0))
popA_int = pd.read_csv(popA_intergenic_sites, sep='\t', header=(0))
popB_int = pd.read_csv(popB_intergenic_sites, sep='\t', header=(0))


## Checkpoint
Checks that popA and popB have identical sites. 

In [56]:
#Create Keys
popA_mut_keys = popA_mut['chromo'].astype(str) + "_" + popA_mut['position'].astype(str)
popB_mut_keys = popB_mut['chromo'].astype(str) + "_" + popB_mut['position'].astype(str)
popA_int_keys = popA_int['chromo'].astype(str) + "_" + popA_int['position'].astype(str)
popB_int_keys = popB_int['chromo'].astype(str) + "_" + popB_int['position'].astype(str)

#Compare Keys
popA_mut_keys.equals(popB_mut_keys)
popA_int_keys.equals(popB_int_keys)

True

## Parse Data
Stores the KnownEM column in vectors.

In [57]:
f_AD = popA_mut['knownEM']
f_BD = popB_mut['knownEM']
f_AN = popA_int['knownEM']
f_BN = popB_int['knownEM']

## Calculate $L_{AB}$
This sums the frequencies of a set of derived mutations (D) in popA relative to popB, and then normalizes by a set of intergenic sites (N) in popA relative to popB. 
$$
    \frac{\sum\limits_{i \in D} f_i^A (1 - f_i^B)}
    {\sum\limits_{j \in N} f_j^A (1 - f_j^B)}
$$

In [58]:
#Calculate Lab
LAB = sum(f_AD*(1-f_BD))/sum(f_AN*(1-f_BN))
print(LAB)

0.0583142880875348


## Calculate $L_{BA}$
This sums the frequencies of a set of derived mutations (D) in population b relative to population a, and then normalizes by a set of intergenic sites (N) in population b relative to population a. 
$$
    \frac{\sum\limits_{i \in D} f_i^B (1 - f_i^A)}
    {\sum\limits_{j \in N} f_j^B (1 - f_j^A)}
$$

In [59]:
#Calculate Lba
LBA = sum(f_BD*(1-f_AD))/sum(f_BN*(1-f_BN))
print(LBA)

0.24268178997850134


## Calculate $R_{BA}$
$R_{BA}$, then is a ratio of ratios
$$
    \frac{L_{AB}}
    {L_{BA}}
$$

In [60]:
RAB=LAB/LBA
print(RAB)

0.24029115696196543
