-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_recombination.py
41 lines (36 loc) · 1.62 KB
/
load_recombination.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import logging
import numpy as np
from os import listdir
import pandas as pd
import pickle
logging.basicConfig(format="%(levelname)s (%(name)s %(lineno)s): %(message)s")
logger = logging.getLogger("indel")
logger.setLevel(logging.INFO)
def load_recombination(in_path, desired_len=0):
"""
Loads recombination map of the genome from text file ***for a single contig***.
Expects one text file per contig.
:param str in_path: Path to recombination map file.
:return: recombination map for contig
:rtype: np.ndarray
"""
logger.info("Loading recombination map from {}".format(in_path))
r = np.genfromtxt(in_path, dtype = None, usecols = (0,1), skip_header = 1)
indices = r['f0'] # positions (sparse)
rates = r['f1'] # recombination rates
# indices are 1 to maxKnownIndex r[-1][0], +1 for empty 0 index since we assume 1-based indexing
recombination_map = np.zeros(indices[-1] + 1)
recombination_map[indices] = rates
mask = np.ones(len(recombination_map), dtype=bool)
mask[indices] = False # now the mask is True only for the indices that are missing from the data
missing_indices = np.arange(len(recombination_map))[mask]
recombination_map[missing_indices] = np.interp(missing_indices, indices, rates)
recombination_map = recombination_map.reshape(len(recombination_map),1)
logger.info("Recombination map loaded.")
if desired_len:
return pad(recombination_map, desired_len)
return recombination_map
def pad(arr, new_len): # Pad to desired length
if len(arr) >= new_len:
return arr
return np.pad(arr, [(0,new_len-len(arr)), (0,0)], mode='edge')