From e87a7460e7905df220b6941128ded55882d8617b Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Mon, 1 May 2017 18:39:03 -0700 Subject: [PATCH 1/3] TST: Adding command to assign ids --- gneiss/cluster/_cluster.py | 25 +++++++++++++++++++++++++ gneiss/cluster/tests/test_cluster.py | 10 ++++++++++ 2 files changed, 35 insertions(+) diff --git a/gneiss/cluster/_cluster.py b/gneiss/cluster/_cluster.py index f057471..406d93e 100644 --- a/gneiss/cluster/_cluster.py +++ b/gneiss/cluster/_cluster.py @@ -5,6 +5,7 @@ # # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- +import uuid import pandas as pd import numpy as np import skbio @@ -16,6 +17,7 @@ from gneiss.plugin_setup import plugin from gneiss.cluster._pba import correlation_linkage, gradient_linkage from gneiss.sort import gradient_sort, mean_niche_estimator +from gneiss.util import rename_internal_nodes def correlation_clustering(table: pd.DataFrame) -> skbio.TreeNode: @@ -121,3 +123,26 @@ def gradient_clustering(table: pd.DataFrame, 'This method is primarily used to sort the table to reveal ' 'the underlying block-like structures.') ) + + +def assign_ids(tree: skbio.TreeNode) -> skbio.TreeNode: + f = lambda x: x.replace('-', '') + ids = ['%sL_%s' % (i, f(str(uuid.uuid4()))) + for i, n in enumerate(tree.levelorder(include_self=True)) + if not n.is_tip()] + t = rename_internal_nodes(tree, names=ids) + return t + + +plugin.methods.register_function( + function=assign_ids, + inputs={'tree': Phylogeny[Rooted]}, + outputs=[('tree', Phylogeny[Rooted])], + name='Assigns ids on internal nodes in the tree.', + input_descriptions={ + 'tree': ('The input tree with potential missing ids.')}, + parameters={}, + output_descriptions={ + 'tree': ('A tree with uniquely identifying ids.')}, + description=('Assigns UUIDs to uniquely identify internal nodes in the tree.') +) diff --git a/gneiss/cluster/tests/test_cluster.py b/gneiss/cluster/tests/test_cluster.py index 6ec5dfc..b1dd960 100644 --- a/gneiss/cluster/tests/test_cluster.py +++ b/gneiss/cluster/tests/test_cluster.py @@ -59,6 +59,16 @@ def test_gradient_artifact_weighted(self): self.assertNotEqual(str(res_clust_uw), str(res_clust_w)) + def test_assign_ids(self): + from qiime2.plugins.gneiss.methods import assign_ids + tree_f = get_data_path("tree.qza") + tree = qiime2.Artifact.load(tree_f) + out_tree = assign_ids(tree) + res_t = out_tree.tree._view(TreeNode) + for n in res_t.levelorder(include_self=True): + self.assertTrue(n.name is not None) + self.assertNotIn('-', n.name) + if __name__ == '__main__': unittest.main() From 2ad7bbf21b720bbcd661bac04f15ef38a3f61f85 Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Mon, 1 May 2017 18:49:06 -0700 Subject: [PATCH 2/3] ENH: Adding command to assign ids and fix polytomies --- CHANGELOG.md | 1 + gneiss/cluster/_cluster.py | 15 ++++++++++----- gneiss/cluster/tests/data/polytomy.qza | Bin 0 -> 3132 bytes gneiss/cluster/tests/data/tree.qza | Bin 0 -> 3130 bytes gneiss/cluster/tests/test_cluster.py | 15 ++++++++++++++- 5 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 gneiss/cluster/tests/data/polytomy.qza create mode 100644 gneiss/cluster/tests/data/tree.qza diff --git a/CHANGELOG.md b/CHANGELOG.md index c49de34..ecd55fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Version 0.3.2 * Added `balance_boxplot` and `balance_barplot` to make interpretation balance partitions easier. +* Added `assign_ids` command to allow for ids to be added manually. ## Version 0.3.0 * Added q2 support for linear regression and linear mixed effects models [#98](https://github.com/biocore/gneiss/pull/98) diff --git a/gneiss/cluster/_cluster.py b/gneiss/cluster/_cluster.py index 406d93e..0695051 100644 --- a/gneiss/cluster/_cluster.py +++ b/gneiss/cluster/_cluster.py @@ -126,11 +126,14 @@ def gradient_clustering(table: pd.DataFrame, def assign_ids(tree: skbio.TreeNode) -> skbio.TreeNode: - f = lambda x: x.replace('-', '') - ids = ['%sL_%s' % (i, f(str(uuid.uuid4()))) - for i, n in enumerate(tree.levelorder(include_self=True)) + def replace_dashes(x): + return str(x).replace('-', '') + t = tree.copy() + t.bifurcate() + ids = ['%sL_%s' % (i, replace_dashes(uuid.uuid4())) + for i, n in enumerate(t.levelorder(include_self=True)) if not n.is_tip()] - t = rename_internal_nodes(tree, names=ids) + t = rename_internal_nodes(t, names=ids) return t @@ -144,5 +147,7 @@ def assign_ids(tree: skbio.TreeNode) -> skbio.TreeNode: parameters={}, output_descriptions={ 'tree': ('A tree with uniquely identifying ids.')}, - description=('Assigns UUIDs to uniquely identify internal nodes in the tree.') + description=('Assigns UUIDs to uniquely identify internal nodes ' + 'in the tree. Also corrects for polytomies to create ' + 'strictly bifurcating trees.') ) diff --git a/gneiss/cluster/tests/data/polytomy.qza b/gneiss/cluster/tests/data/polytomy.qza new file mode 100644 index 0000000000000000000000000000000000000000..4d04aecbde8239f11fd709e6c70d070248552be2 GIT binary patch literal 3132 zcmd6pc{J2}AIE2rEM-Y38RYgLX3Q`Q(qf&=FiZv^m1Tx!$~wk+Q}*oHA4`-dB3ZIz zSIRYJCd`c)OO|VoE7iD^)T4Xvb1Kj29H;aA@qB;hbH3;M&-?xUem=i*zAwBfm}f5l z0N?{W%P7F^9z(l1Zl0VrOMSDDNF6nG9ij`2MAFfOX}Gv*!<>;i&M+N>nkEW`L?DPH z6x@SEbs{=Zom7LIJls`aOtUIW1gcI$quLBJfWy^l^lM_wAN1nX>&#fS8VC(dBoc)% z!Dvn@--VjgTJm+3FlZJQ%(4Wb?jAS>7dwrm!@F_ND$8Y4SdZB=e;O2vQ>DR6sbE20 zAOQbO-Z4cX57o__(7$V2d2Ng>2^b4=K`xi;XN~v`3&lR`f|}M^BBrn^SOyN(1MjZ0 zM5rS)*vJ87uPFwG90zUj^;?s+5!>_>-K=f*>2r_ZR0@ft>KSnH*Itw77#3#Wd7nui zrvG`(%vOu9M{#V^W?NVDrg-_2Je@pUNdGTW(fWbt|APUo=T%=XAiwD)s>DtW8JvO<{8_? zJ7d#BW7VS%ebqi-LBerI{(&wWQcEfBczR_xeWN!#Aji`#;|{%)VRx>mzuJ~Pd|h)m zz4u;;r)SntN;#1}MfFvTc_fD#PbvKu9yS6 zDEo%yS`LYb7jHDpNn+9}`f{T^XmX6EIkvvhsU*D3t6T03hu`8d+gcn}*cJf+ee$$4 zXd}U6r_w;U@vA1GkC#8JO^dy}rDY}35HA^&U4&^3UayZZ2g@nm7FIol0d_}2ex0}L zNTOdB+gLxp9QW`iuAcwB)hM4K-^St`_k;Z8Y7>@TN0#0};<03wm={B4+OE1eh8b(x zWuAF2l_97(fkuFxpmNQ61vQ5f4PMLdMZTaEKa7Ho1n&S^jc0;FyE}4FA!Sku!MohV zjovj3`v!FeO3;R6lV4hi9SrUm$|PIOhOOrhi{Sxexr+zIbrrH_o(J&@A&^w`VeWay zJrTa?0fjrr{-JNcT0Tti(r&%YJ^}iLB=wDqz55=>qfcmxp>Xu}@jks9D_DdPa`*j9 zjEQX9(U=jsfRBQj|5U)rlv!)xsJb&R(>b@`73Y+aXTR^sJr!sCCJF0&{7PfL;>g8GDlt~6Oi_hTXVJJJx?Z*ZK zFo)sj4>z}pmv=hh=T9hWC+W~z!s@OPs6F=X3wCzLONLqx z8w+rOAG@c)w7$SRt5FWtrHRB5dMgw9&Jn$+FxmqW{;+?Rf}Tp#=FNGUNfk%6Ew|S^ zAxF;tk@xtFwCe{lHYK12UbQrn*8)ktyZE9YYroAn5y-P}i>s1&ivX>kPYBX0I6{AH zy&TC~2jPUJ=PR?W8k|&?7G&}YTI26%$5)b57yrOkbqLrqreC)EC|-{vsWq8N`FF-K zAndjLtMbV+IiY$c1wEc+#CgbwXoU`BUpHauv9A9Yr7M2t_(bEF)&KrO>nq| zn9A^jWR3|Jxn@%1+w=cfZq7ztpL-FHl{>3w{mVH~Er(ThrHK9omIcyrKNN%r9bU`U zPo4=GmGuS8=0+qKHq4H(mJUrOEymweU-Y&;yDIey>IpEDM;hoiidymj4&w!q8_sOJ zd(b;Tf!qw_-A9{q3ijn>gW!pwVyc25$BCqHjkcVZwwVP4`r^erQ&7GT;Y7fYv zt{>*`NQe-2C>$e0A}10~mm8cOWQhBc`4`qMuLWLPw$}fh>Rc1wJ+zb36V=XAbsdbD zZ?s4*uz6tBG2^6g^0;}w89lT<0%)HmncG0*ZR35Gga^$e`ys2S93<>gzBk9?z^I`% z^^^13**gxGh%Az(VCn9iQb_g){7IGgh_+Y!>F7NS$dD|D)0CD^msM@(o)rV*f%!(P zEU)O}4R&4iCy=a-uDcB~CA$2J7E(@a1m#dpQ?zC_dF5#Mf+L@Zp<@~)k`PUOwKuUa zb2+b44x(_cd3mY&+>|jM&JC<94m2M2=PAX?u$=i0ZSyK$S$(HpTXE-$3GD|={u_SqraLxa3IO2U{OmUs8T_A5{{}?| BfPzQ}q6P>KRfmE!onek3qLaFY7EH?l?yN-udyssI zPDEd#Y5>v0T^ZDhDYAedP;m4Uhz1>tz{2|wYET%x1PR0SVlis4r|?n?6jh9wQo65# zEV1BfxYyEXYTB9?%iq)vMq#6;Oq#(>Sd}7+MKq`#({`y*#RRL`2!7xT_zn0=G? zwF19~>P}9Te`-&8@kSP>QKmTF<>h5RE6BKsib-383cADsGHIf0(t-uGgPTe%AW#Ur z57rCoK%+pgQPy3)?qiB6LOY&HrFcF3R@t*IaJjS|3DVXwHoQh4%W_=+SZX0yULDwh`S)%n0~aL zqqc)nl%%U`E$`}n>lE-%H_vo_t7%u~x*CZ0WICVjMNxB&tZHW|)I05OZ_d9gDYqxI z*3FUWwXei`NH#_~cWvTgmHOidgvbq-w53XQ!3ECTIE4F+_uOo&658IB^>d$!<1>*j zNw+UF;IiN7(&It-Q zT2lg7@81bdl0SP}LuxPymFD01h#GZEl9)eY)K4jH(y-oM{dlh;xcv9%rX!}0-x#j| zWf?reFJw5@H$P0In!bq{G1Hq=Rb8@5k`LHg_2f+FfW(`~{5vU$HA5;8Ipc!WR;|Y_ zFN9PnkB4;t`-?iM<7G3xrigxhR(cMGURjf->o^jLr%clNZ|k6Y+ns$T4?w9${gMK1 z%MWom%f=q4=9M%-(~rSZvv1C3YS$y2#s;*xvRLmP1XxNl`(t0srHQI#9FJly70xgt zKP00jG?0Go(E_)LLGAGJrO}PVK`Gr(c;QOIbtx*wXO7IR4uS16KK8t#@(9PKqD-4` zP8~Xb zuszvV<*>wMtfGu$RLB4HT0c%AK`_D^Ct{sR@D1fXpQl@ycO=$9mNMz>0%LLEOq@4U zcfZc8i!YGnWD}gVysWL8cZf7``KCK$Euu(_{rKpK2HQe&S`SH$pHp$Bz&n;7H(n!u zLD*GB*C6}Msv+#u4f->N>$z_Cj8V4f>{k&6_^<- zfQtc62hUO|%F2P~GSr^jnzaz?Rq`w2Gu4-&#o zc>SOWQGd~(KaMchM+O2vo|o6E5p_T8EYQR?GA^WFvTnFc6hA=)74xKsNb_XBAvRyp zQQ@_kCxb@RFk=c3r9xK=AR$0SndPYA{7q{wU`Ta9i2$=-GNQ!+b^I!5|4$H5kp!_k zD0fjhT`VNy^dxxTQG$PS(+KXKu zUMejlIa6%bAlpSLg9jweYo&X7-slV*bt!0!As5kn+Z2* zq-<+0AS_XzW=CioIO}B)dPuYD{88tW4T*vJp(2rv^Wi+6Jz?)=dba7pJDdgsp zy~C3do8vEVRmXmX=TVa|K`k{Uq5f%Cy=~+po-$Zev6Q8_Oan-S^}e0TW6oi}-?H?3 zVk~PWiPJGZXFN#1#!|sAejaU)nNAY>LtIl!k2~3*bU2?*Y+YUqs3w-aTjv_<*ov86 z8=4yDl()kdhk7tN(ths_mXoGb#XT$dIro^}>+Fdz+Nq~qZXbtz9x_Sw083p3b9f3h z+mtLk_pUvb@1*wyRf#!V_y!fnSV-oPT_x0{wLGC*sPgDwi;{z&w5qJHcrQrOs40sy zh^&-z{__vh0nZy^Lcz;pPPW{&G62&Xp?JE`DYlbKX4!Yzu>PrvVP3yXA22hSj7aaT z=DvSr;1zg5O>B)Znl=0Di+`IQT;ERgq!#SG2DdJy=1mfLv+|5`a= zpSL~X#5ZKOTYoou{jClF&y85E5$g;=e->}wh{oQEwML%Y*`riYs z-Bw>W=`#xbXXS($v+nUKUs-*pU%O%Fi%H`TnEW^Tn4#HsPznIx*!gUB6q~p|pZ)}$ CGSh|t literal 0 HcmV?d00001 diff --git a/gneiss/cluster/tests/test_cluster.py b/gneiss/cluster/tests/test_cluster.py index b1dd960..66b8e73 100644 --- a/gneiss/cluster/tests/test_cluster.py +++ b/gneiss/cluster/tests/test_cluster.py @@ -39,7 +39,6 @@ def test_gradient_artifact(self): res = gradient_clustering(in_table, in_metadata.get_category('x')) res_clust = res.clustering._view(TreeNode) exp_str = '((o1:0.5,o2:0.5)y1:0.5,(o3:0.5,o4:0.5)y2:0.5)y0;\n' - print(str(res_clust)) self.assertEqual(exp_str, str(res_clust)) def test_gradient_artifact_weighted(self): @@ -69,6 +68,20 @@ def test_assign_ids(self): self.assertTrue(n.name is not None) self.assertNotIn('-', n.name) + def test_assign_ids_polytomy(self): + from qiime2.plugins.gneiss.methods import assign_ids + tree_f = get_data_path("polytomy.qza") + tree = qiime2.Artifact.load(tree_f) + out_tree = assign_ids(tree) + res_t = out_tree.tree._view(TreeNode) + res_nontips = [] + for n in res_t.levelorder(include_self=True): + self.assertTrue(n.name is not None) + self.assertNotIn('-', n.name) + if not n.is_tip(): + res_nontips.append(n.name) + self.assertEqual(len(res_nontips), 4) + if __name__ == '__main__': unittest.main() From 9683507fd5d44543395b78c380d405c4cd6d3b3c Mon Sep 17 00:00:00 2001 From: Jamie Morton Date: Tue, 2 May 2017 07:46:10 -0700 Subject: [PATCH 3/3] FIX: Adding dashes back into uuid --- gneiss/cluster/_cluster.py | 5 ++--- gneiss/cluster/tests/test_cluster.py | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/gneiss/cluster/_cluster.py b/gneiss/cluster/_cluster.py index 0695051..b5a10c1 100644 --- a/gneiss/cluster/_cluster.py +++ b/gneiss/cluster/_cluster.py @@ -126,11 +126,10 @@ def gradient_clustering(table: pd.DataFrame, def assign_ids(tree: skbio.TreeNode) -> skbio.TreeNode: - def replace_dashes(x): - return str(x).replace('-', '') + t = tree.copy() t.bifurcate() - ids = ['%sL_%s' % (i, replace_dashes(uuid.uuid4())) + ids = ['%sL-%s' % (i, uuid.uuid4()) for i, n in enumerate(t.levelorder(include_self=True)) if not n.is_tip()] t = rename_internal_nodes(t, names=ids) diff --git a/gneiss/cluster/tests/test_cluster.py b/gneiss/cluster/tests/test_cluster.py index 66b8e73..82a551d 100644 --- a/gneiss/cluster/tests/test_cluster.py +++ b/gneiss/cluster/tests/test_cluster.py @@ -66,7 +66,6 @@ def test_assign_ids(self): res_t = out_tree.tree._view(TreeNode) for n in res_t.levelorder(include_self=True): self.assertTrue(n.name is not None) - self.assertNotIn('-', n.name) def test_assign_ids_polytomy(self): from qiime2.plugins.gneiss.methods import assign_ids @@ -77,7 +76,6 @@ def test_assign_ids_polytomy(self): res_nontips = [] for n in res_t.levelorder(include_self=True): self.assertTrue(n.name is not None) - self.assertNotIn('-', n.name) if not n.is_tip(): res_nontips.append(n.name) self.assertEqual(len(res_nontips), 4)