Skip to content

Commit

Permalink
tests passing, 6
Browse files Browse the repository at this point in the history
  • Loading branch information
jgallowa07 committed Apr 4, 2019
1 parent 1c420c9 commit 76290cb
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 46 deletions.
44 changes: 38 additions & 6 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,21 @@
from __future__ import division

import msprime
import pyslim
import tskit
import unittest


_slim_example_files = [
"tests/slim_trees/high_dispersal_2d_slim.trees",
"tests/slim_trees/neutral_slim.trees",
"tests/slim_trees/sweep_slim.trees"]

_trivial_tables = [
"tests/trivial_tree_tables/nodes.txt",
"tests/trivial_tree_tables/edges.txt"]


def setUp():
pass

Expand Down Expand Up @@ -49,9 +61,29 @@ def get_msprime_examples(self):
Generate some msprime simulations
across a range of parameters for encoding tests
"""
for n in [2, 10, 20]:
for mutrate in [0.0]:
for recrate in [1e-4, 1e-5]:
for l in [1e2, 1e3]:
yield msprime.simulate(n, length=l, mutation_rate=mutrate,
recombination_rate=recrate)
seed = 23
for n in [10, 20]:
for recrate in [0, 1e-3]:
for l in [1e2, 1e3]:
yield msprime.simulate(
n, length=l, mutation_rate=0,
recombination_rate=recrate, random_seed=seed)

def get_slim_examples(self):
"""
Load some slim tree sequences
for testing.
"""
for filename in _slim_example_files:
yield pyslim.load(filename)

def get_trivial_ts(self):
"""
load in a trivial tree seuqence for testing purposes
this is the same topology of a tree sequence example found in
https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006581
"""
nodes = open(_trivial_tables[0], "r")
edges = open(_trivial_tables[1], "r")
ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)
return ts
9 changes: 6 additions & 3 deletions tests/test_one_to_one.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ def test_Inverse(self):

for ts in self.get_msprime_examples():
dts = self.DiscretizeTreeSequence(ts)
edts = tsencode.encode(dts, return_8bit=False)
# edts1 = tsencode.encode(dts, return_8bit=False)
encoder = tsencode.TsEncoder(dts)
encoder.add_one_to_one()
edts = encoder.get_encoding()
de_dts = self.DecodeTree(edts)
self.assertTreeSequenceEqual(dts, de_dts)

Expand Down Expand Up @@ -50,7 +53,7 @@ def DecodeTree(self, A):
if((top < 0) | (bot < 0)):
continue
parent = GlueInt8(top, bot)
edge_table.add_row(left=column, right=column+1, parent=parent, child=row)
edge_table.add_row(left=column, right=column + 1, parent=parent, child=row) # NOQA
tables.sort()
tables.simplify()
ts = tables.tree_sequence()
Expand All @@ -67,7 +70,7 @@ def DiscretizeTreeSequence(self, ts):
edges = tables.edges
oldest_time = max(nodes.time)
nodes.set_columns(flags=nodes.flags,
time=(nodes.time/oldest_time)*256,
time=(nodes.time / oldest_time) * 256,
population=nodes.population)
edges.set_columns(left=np.round(edges.left),
right=np.round(edges.right),
Expand Down
21 changes: 20 additions & 1 deletion tsencode/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,25 @@
import itertools


def get_genome_coordinates(ts, dim=1):
"""
Loop through the tree sequence and
get all coordinates of each genome of diploid individual.
:param dim: the dimentionality, 1,2, or 3.
"""

coordinates = []
for i in range(dim):
coordinates.append(np.zeros(ts.num_samples))
for ind in ts.individuals():
for d in range(dim):
for geno in range(2):
coordinates[d][ind.nodes[geno]] = ind.location[d]

return coordinates


def splitInt16(int16):
'''
Take in a 16 bit integer, and return the top and bottom 8 bit integers
Expand All @@ -32,7 +51,7 @@ def GlueInt8(int8_t, int8_b):
int8_b = np.uint8(int8_b)
bits_a = np.binary_repr(int8_t, 8)
bits_b = np.binary_repr(int8_b, 8)
ret = int(bits_a+bits_b, 2)
ret = int(bits_a + bits_b, 2)
return np.uint16(ret)


Expand Down
2 changes: 2 additions & 0 deletions tsencode/one_to_one.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
'''
DEPRECATED: this function can be acheived through the
TsEncoder.add_one_to_one() method
Encoding Tree Sequence,
This is code that takes in a treeSequence object from msprime,
Expand Down
92 changes: 56 additions & 36 deletions tsencode/tsEncoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def __init__(self,
self.height = treeSequence.num_nodes
self.width = width
if width is None:
self.width = int(treeSequence.sequence_length)
self.width = treeSequence.sequence_length
self.width = int(self.width)
self.datatype = dtype
if dtype is None:
self.datatype = np.float64
Expand All @@ -40,7 +41,7 @@ def initialize_layer(self):
"""

nn = self.ts.num_nodes
layer = np.zeros([nn, int(self.width), 1]).astype(self.datatype)
layer = np.zeros([nn, int(self.width), 1]).astype(self.datatype) - 1
if(self.layerIndex < 0):
self.Encoding = layer
self.layerIndex = 0
Expand All @@ -51,7 +52,7 @@ def initialize_layer(self):
return None

def map_locus_to_column(self, locus):
return int((locus/self.ts.sequence_length)*self.width)
return int((locus / self.ts.sequence_length) * self.width)

def add_node_time_layer(self):
"""
Expand Down Expand Up @@ -90,8 +91,8 @@ def add_parent_pointer(self, split=False):
right = self.map_locus_to_column(edge.right)
if(split):
top, bot = splitInt16(edge.parent)
self.Encoding[child, left:right, self.layerIndex-1] = bot
self.Encoding[child, left:right, self.layerIndex] = top
self.Encoding[child, left:right, self.layerIndex - 1] = top
self.Encoding[child, left:right, self.layerIndex] = bot
else:
self.Encoding[child, left:right, self.layerIndex] = edge.parent

Expand All @@ -113,7 +114,8 @@ def add_branch_length_layer(self):

return None

def normalize_layers(self, layers=[], scale=256):
def normalize_layers(self, layers=[], scale=256, trans="linear"):
# TODO make log scale norm
'''
This function will normailize a layer by finding the
max value in that layer, and normailizing all values
Expand All @@ -125,54 +127,72 @@ def normalize_layers(self, layers=[], scale=256):
for i in layers:
fl = self.Encoding[:, :, i].flatten()
sh = self.Encoding[:, :, i].shape
ma = max(fl)
nor = ((fl/ma)*scale)
if trans == "linear":
ma = max(fl)
nor = ((fl / ma) * scale)
else:
# This still needs work: Talk to Peter
log_fl = np.log(fl + 1)
ma = max(log_fl)
nor = ((log_fl / ma) * scale)
self.Encoding[:, :, i] = nor.reshape(sh)

return None

def add_prop_layer(self):
def add_prop_layer(self, initial_weights, function):
# TODO Finish making this universal
"""
None -> None
This will be a general propagation layer
"""

pass

def add_spatial_prop_layer(self, function, dim=2, normalizePropWeights=True):
"""
(msprime TreeSequence) -> None
This will be a propagation layer which
will use spatial locations as the initial weights
The parameter, function, will be used to calculate
weights of all parents as a function of their children's
weights
"""

dim = len(initial_weights)
for i in range(dim):
self.initialize_layer()
coordinates = []
for i in range(dim):
coordinates.append(np.zeros(self.ts.num_samples))
for ind in self.ts.individuals():
for d in range(dim):
for geno in range(2):
coordinates[d][ind.nodes[geno]] = ind.location[d]
wt = weighted_trees(self.ts, coordinates, function)

# check to see that each set of weights provided is equal to the number
# of samples, If not throw an error.
if self.ts.num_samples is not len(initial_weights[i]):
# TODO raise error
return None

wt = weighted_trees(self.ts, initial_weights, function)
for t in wt:
left = self.map_locus_to_column(t.interval[0])
right = self.map_locus_to_column(t.interval[1])

# If the image has been squished enough, skip this tree because it
# won't add anything to the encoding.
if left is right:
continue

nodes = np.array([n for n in t.nodes()])
inter = right-left
inter = right - left
weights = np.array([w for w in t.node_weights()])
for i in range(dim):
self.Encoding[nodes, left:right, self.layerIndex-i] = np.repeat(weights[:, i], inter).reshape([len(weights), inter]) # NOQA
l, r = left, right
n, i, w = nodes, inter, weights
for d in range(dim):
pl = self.layerIndex - d
# this may be confusing, but essentially
# it's just a vectorized way to populate the encoding with the node weights # NOQA
self.Encoding[n, l:r, pl] = np.repeat(w[:, (dim-1)-d], i).reshape([len(w), i]) # NOQA

# how I was originally doing it
# self.Encoding[nodes, left:right, self.layerIndex-i] = np.repeat(weights[:, i], inter).reshape([len(weights), inter]) # NOQA

return None

def add_one_to_one(self):
"""
This function should replicate the one-to-one function found int
tsencode/one_to_one.py.
For now, it is mostly for testing purposes do to code that has been
written to inverse this function.
"""

self.add_node_time_layer()
self.add_parent_pointer(split=True)

return None

Expand All @@ -194,7 +214,7 @@ def visualize(self, saveas=None, show=True):
# if there is less than three layers, add trivial layers to the image
if(self.layerIndex < 2):
nn = self.ts.num_nodes
trivial_layers = np.zeros([nn, int(self.width), 2-self.layerIndex]).astype(np.uint8) #NOQA
trivial_layers = np.zeros([nn, int(self.width), 2 - self.layerIndex]).astype(np.uint8) # NOQA
img_array = np.append(img_array, trivial_layers, axis=2)
else:
img_array = img_array[:, :, :3]
Expand Down

0 comments on commit 76290cb

Please sign in to comment.