tests passing, 6

jgallowa07 · Apr 4, 2019 · 76290cb · 76290cb
1 parent 1c420c9
commit 76290cb
Show file tree

Hide file tree

Showing 5 changed files with 122 additions and 46 deletions.
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -6,9 +6,21 @@
 from __future__ import division
 
 import msprime
+import pyslim
+import tskit
 import unittest
 
 
+_slim_example_files = [
+    "tests/slim_trees/high_dispersal_2d_slim.trees",
+    "tests/slim_trees/neutral_slim.trees",
+    "tests/slim_trees/sweep_slim.trees"]
+
+_trivial_tables = [
+    "tests/trivial_tree_tables/nodes.txt",
+    "tests/trivial_tree_tables/edges.txt"]
+
+
 def setUp():
     pass
 
@@ -49,9 +61,29 @@ def get_msprime_examples(self):
         Generate some msprime simulations
         across a range of parameters for encoding tests
         """
-        for n in [2, 10, 20]:
-            for mutrate in [0.0]:
-                for recrate in [1e-4, 1e-5]:
-                    for l in [1e2, 1e3]:
-                        yield msprime.simulate(n, length=l, mutation_rate=mutrate,
-                                               recombination_rate=recrate)
+        seed = 23
+        for n in [10, 20]:
+            for recrate in [0, 1e-3]:
+                for l in [1e2, 1e3]:
+                    yield msprime.simulate(
+                        n, length=l, mutation_rate=0,
+                        recombination_rate=recrate, random_seed=seed)
+
+    def get_slim_examples(self):
+        """
+        Load some slim tree sequences
+        for testing.
+        """
+        for filename in _slim_example_files:
+            yield pyslim.load(filename)
+
+    def get_trivial_ts(self):
+        """
+        load in a trivial tree seuqence for testing purposes
+        this is the same topology of a tree sequence example found in
+        https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006581
+        """
+        nodes = open(_trivial_tables[0], "r")
+        edges = open(_trivial_tables[1], "r")
+        ts = tskit.load_text(nodes=nodes, edges=edges, strict=False)
+        return ts
diff --git a/tests/test_one_to_one.py b/tests/test_one_to_one.py
@@ -20,7 +20,10 @@ def test_Inverse(self):
 
         for ts in self.get_msprime_examples():
             dts = self.DiscretizeTreeSequence(ts)
-            edts = tsencode.encode(dts, return_8bit=False)
+            # edts1 = tsencode.encode(dts, return_8bit=False)
+            encoder = tsencode.TsEncoder(dts)
+            encoder.add_one_to_one()
+            edts = encoder.get_encoding()
             de_dts = self.DecodeTree(edts)
             self.assertTreeSequenceEqual(dts, de_dts)
 
@@ -50,7 +53,7 @@ def DecodeTree(self, A):
                 if((top < 0) | (bot < 0)):
                     continue
                 parent = GlueInt8(top, bot)
-                edge_table.add_row(left=column, right=column+1, parent=parent, child=row)
+                edge_table.add_row(left=column, right=column + 1, parent=parent, child=row) # NOQA
         tables.sort()
         tables.simplify()
         ts = tables.tree_sequence()
@@ -67,7 +70,7 @@ def DiscretizeTreeSequence(self, ts):
         edges = tables.edges
         oldest_time = max(nodes.time)
         nodes.set_columns(flags=nodes.flags,
-                          time=(nodes.time/oldest_time)*256,
+                          time=(nodes.time / oldest_time) * 256,
                           population=nodes.population)
         edges.set_columns(left=np.round(edges.left),
                           right=np.round(edges.right),

diff --git a/tsencode/helpers.py b/tsencode/helpers.py
@@ -8,6 +8,25 @@
 import itertools
 
 
+def get_genome_coordinates(ts, dim=1):
+    """
+    Loop through the tree sequence and
+    get all coordinates of each genome of diploid individual.
+
+    :param dim: the dimentionality, 1,2, or 3.
+    """
+
+    coordinates = []
+    for i in range(dim):
+        coordinates.append(np.zeros(ts.num_samples))
+    for ind in ts.individuals():
+        for d in range(dim):
+            for geno in range(2):
+                coordinates[d][ind.nodes[geno]] = ind.location[d]
+
+    return coordinates
+
+
 def splitInt16(int16):
     '''
     Take in a 16 bit integer, and return the top and bottom 8 bit integers
@@ -32,7 +51,7 @@ def GlueInt8(int8_t, int8_b):
     int8_b = np.uint8(int8_b)
     bits_a = np.binary_repr(int8_t, 8)
     bits_b = np.binary_repr(int8_b, 8)
-    ret = int(bits_a+bits_b, 2)
+    ret = int(bits_a + bits_b, 2)
     return np.uint16(ret)
 
 

diff --git a/tsencode/one_to_one.py b/tsencode/one_to_one.py
@@ -1,4 +1,6 @@
 '''
+DEPRECATED: this function can be acheived through the
+TsEncoder.add_one_to_one() method
 
 Encoding Tree Sequence,
 This is code that takes in a treeSequence object from msprime,

diff --git a/tsencode/tsEncoder.py b/tsencode/tsEncoder.py
@@ -27,7 +27,8 @@ def __init__(self,
             self.height = treeSequence.num_nodes
         self.width = width
         if width is None:
-            self.width = int(treeSequence.sequence_length)
+            self.width = treeSequence.sequence_length
+        self.width = int(self.width)
         self.datatype = dtype
         if dtype is None:
             self.datatype = np.float64
@@ -40,7 +41,7 @@ def initialize_layer(self):
         """
 
         nn = self.ts.num_nodes
-        layer = np.zeros([nn, int(self.width), 1]).astype(self.datatype)
+        layer = np.zeros([nn, int(self.width), 1]).astype(self.datatype) - 1
         if(self.layerIndex < 0):
             self.Encoding = layer
             self.layerIndex = 0
@@ -51,7 +52,7 @@ def initialize_layer(self):
         return None
 
     def map_locus_to_column(self, locus):
-        return int((locus/self.ts.sequence_length)*self.width)
+        return int((locus / self.ts.sequence_length) * self.width)
 
     def add_node_time_layer(self):
         """
@@ -90,8 +91,8 @@ def add_parent_pointer(self, split=False):
             right = self.map_locus_to_column(edge.right)
             if(split):
                 top, bot = splitInt16(edge.parent)
-                self.Encoding[child, left:right, self.layerIndex-1] = bot
-                self.Encoding[child, left:right, self.layerIndex] = top
+                self.Encoding[child, left:right, self.layerIndex - 1] = top
+                self.Encoding[child, left:right, self.layerIndex] = bot
             else:
                 self.Encoding[child, left:right, self.layerIndex] = edge.parent
 
@@ -113,7 +114,8 @@ def add_branch_length_layer(self):
 
         return None
 
-    def normalize_layers(self, layers=[], scale=256):
+    def normalize_layers(self, layers=[], scale=256, trans="linear"):
+        # TODO make log scale norm
         '''
         This function will normailize a layer by finding the
         max value in that layer, and normailizing all values
@@ -125,54 +127,72 @@ def normalize_layers(self, layers=[], scale=256):
         for i in layers:
             fl = self.Encoding[:, :, i].flatten()
             sh = self.Encoding[:, :, i].shape
-            ma = max(fl)
-            nor = ((fl/ma)*scale)
+            if trans == "linear":
+                ma = max(fl)
+                nor = ((fl / ma) * scale)
+            else:
+                # This still needs work: Talk to Peter
+                log_fl = np.log(fl + 1)
+                ma = max(log_fl)
+                nor = ((log_fl / ma) * scale)
             self.Encoding[:, :, i] = nor.reshape(sh)
 
         return None
 
-    def add_prop_layer(self):
+    def add_prop_layer(self, initial_weights, function):
         # TODO Finish making this universal
         """
         None -> None
 
         This will be a general propagation layer
         """
-
-        pass
-
-    def add_spatial_prop_layer(self, function, dim=2, normalizePropWeights=True):
-        """
-        (msprime TreeSequence) -> None
-
-        This will be a propagation layer which
-        will use spatial locations as the initial weights
-
-        The parameter, function, will be used to calculate
-        weights of all parents as a function of their children's
-        weights
-        """
-
+        dim = len(initial_weights)
         for i in range(dim):
             self.initialize_layer()
-        coordinates = []
-        for i in range(dim):
-            coordinates.append(np.zeros(self.ts.num_samples))
-        for ind in self.ts.individuals():
-            for d in range(dim):
-                for geno in range(2):
-                    coordinates[d][ind.nodes[geno]] = ind.location[d]
-        wt = weighted_trees(self.ts, coordinates, function)
+
+            # check to see that each set of weights provided is equal to the number
+            # of samples, If not throw an error.
+            if self.ts.num_samples is not len(initial_weights[i]):
+                # TODO raise error
+                return None
+
+        wt = weighted_trees(self.ts, initial_weights, function)
         for t in wt:
             left = self.map_locus_to_column(t.interval[0])
             right = self.map_locus_to_column(t.interval[1])
+
+            # If the image has been squished enough, skip this tree because it
+            # won't add anything to the encoding.
             if left is right:
                 continue
+
             nodes = np.array([n for n in t.nodes()])
-            inter = right-left
+            inter = right - left
             weights = np.array([w for w in t.node_weights()])
-            for i in range(dim):
-                self.Encoding[nodes, left:right, self.layerIndex-i] = np.repeat(weights[:, i], inter).reshape([len(weights), inter])    # NOQA
+            l, r = left, right
+            n, i, w = nodes, inter, weights
+            for d in range(dim):
+                pl = self.layerIndex - d
+                # this may be confusing, but essentially
+                # it's just a vectorized way to populate the encoding with the node weights # NOQA
+                self.Encoding[n, l:r, pl] = np.repeat(w[:, (dim-1)-d], i).reshape([len(w), i]) # NOQA
+
+                # how I was originally doing it
+                # self.Encoding[nodes, left:right, self.layerIndex-i] = np.repeat(weights[:, i], inter).reshape([len(weights), inter])    # NOQA
+
+        return None
+
+    def add_one_to_one(self):
+        """
+        This function should replicate the one-to-one function found int
+        tsencode/one_to_one.py.
+
+        For now, it is mostly for testing purposes do to code that has been
+        written to inverse this function.
+        """
+
+        self.add_node_time_layer()
+        self.add_parent_pointer(split=True)
 
         return None
 
@@ -194,7 +214,7 @@ def visualize(self, saveas=None, show=True):
         # if there is less than three layers, add trivial layers to the image
         if(self.layerIndex < 2):
             nn = self.ts.num_nodes
-            trivial_layers = np.zeros([nn, int(self.width), 2-self.layerIndex]).astype(np.uint8)    #NOQA
+            trivial_layers = np.zeros([nn, int(self.width), 2 - self.layerIndex]).astype(np.uint8) # NOQA
             img_array = np.append(img_array, trivial_layers, axis=2)
         else:
             img_array = img_array[:, :, :3]