In [1]:
import numpy as np
from IPython.display import HTML
from bokeh.plotting import output_notebook, show
import genomes_dnj.lct_interval.series_plots as dm
import genomes_dnj.lct_interval.anal_series as an
import genomes_dnj.lct_interval.series_masks as sm
output_notebook(hide_banner=True)

<h3>117_1685 Selection Processes</h3>
<div style="width:700px">
<p>
The series 117_1685 extends across a 618,000 base region of chromosome
2 that includes the genes rab3gap1, zranb3 and half of r3hdm1.  The
435,000 base series 123_1561 extends over most of the same region.
So do the 509,000 base series 62_1265 and the 628,000 base series
193_843.  Some complex history of genetic events was required to
generate these series in the different combinations that are heavily
over expressed in 1000 genomes populations.  It is not possible to
reconstruct the details of that history.  But, some of its major features
are represented in the different associations of SNP series expressed by
1000 genomes chromosome samples.
<p>
The data presented in this notebook shows five independent hierarchies
where the series 117_1685 is over expressed.  Most chromosome samples
expressing associations of series that are part of any of those five hierarchies
express all 117 SNPs of the series 117_1685.  Three of those hierarchies selected
117_1685 along with 123_1561, and 62_1265.  Chromosome samples expressing those
hierarchies include 843 that express 193_843, 329 that express the series 67_329
and 56 that express the series 209_56.
<p>
The other two series 74_210 and 290_16 selected 117_1685 without 123_1561 or
62_1265.  The 210 chromosome samples that express the 74_210 hierarchy are
clean of both 123_1561 and 62_1265.  Those samples include 179 that express
none of the series 123_1561 SNPs and 207 that express none of the 62_1265 SNPs.
In contrast, the series 290_16 is associated with significant fragments of
both 123_1561 and 62_1265.  The chromosome samples that express 290_16 express
95 or 96 SNPs from the series 123_1561 and 40 SNPs from the series 62_1265.
<p>
A different kind of history of series SNP remodeling is illustrated by the
chromosome samples that express the series 28_434.  That history includes hierarchies
with 117_1685 on its own, 117_1685 with 123_1561, 117_1685 with 13_1696, and
123_1561 with 13_1696.  Each of the hierarchies formed with these different
associations includes fragments of series that are not fully expressed
and a variety of selector series.
<p>
The processes for generating all of these heavily over expressed complex associations
of dense SNP series remain unclear and are likely to have varied for different
hierarchies.  But, it is hard to imagine processes that could have succeeded
in all of these independent selections of 117_1685 without some functional
logic to that series of SNPs.
</div>
<h3>193_843</h3>
<div style="width:700px">
<p>
The first plot shows the most common series for chromosome samples
that express 117_1685 and 193_843.  The series 117_1685 is expressed
by 842 of the 843 chromosome samples that express 193_843.  Data on series
SNP counts shows that 702 of these chromosome samples express all 117 
series 117_1685 SNPs.  Another 103 chromosome samples express 116 of those
SNPs.
</div>

In [2]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_193_843], min_match=0.5)
plt = plt_obj.do_plot()
am_193_843 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [3]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
353244,135758231,618284,117,1685,0.5,842,1.0,79,0.47,26,0.49,143,1.23,167,0.99,83,0.49,116,1.9,228,2.21
353478,135933921,434642,123,1561,0.54,842,1.0,79,0.47,26,0.49,143,1.23,167,0.99,83,0.49,116,1.9,228,2.21
353814,136406646,31432,5,1460,0.5,731,0.87,74,0.5,24,0.52,138,1.36,139,0.94,81,0.55,88,1.66,187,2.09
353269,135766890,509095,62,1265,0.67,842,1.0,79,0.47,26,0.49,143,1.23,167,0.99,83,0.49,116,1.9,228,2.21
353790,136393157,48253,10,1218,0.62,750,0.89,76,0.5,26,0.55,138,1.33,141,0.93,81,0.54,92,1.69,196,2.13
353906,136496493,57432,9,1170,0.51,592,0.7,45,0.38,16,0.43,126,1.54,138,1.16,49,0.41,68,1.58,150,2.07
353849,136447707,42559,4,1149,0.62,711,0.84,50,0.35,20,0.45,134,1.36,144,1.01,78,0.55,90,1.74,195,2.24
353907,136496805,55824,9,1023,0.58,595,0.71,45,0.38,15,0.4,128,1.55,140,1.17,49,0.41,68,1.57,150,2.06
353984,136556805,190280,39,1014,0.51,517,0.61,31,0.3,15,0.46,121,1.69,99,0.95,45,0.43,71,1.89,135,2.13
353935,136511874,21321,5,976,0.48,468,0.56,44,0.47,13,0.44,77,1.19,129,1.37,18,0.19,61,1.79,126,2.2


In [4]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_193_843)
count_data

array([(109,   1), (113,   7), (114,   3), (115,  26), (116, 103),
       (117, 702)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>67_329</h3>
<div style="width:700px">
<p>
This plot shows the hierarchy selected by series 67_329, the selector expressed
by the second largest number of chromosome samples that express 117_1685.  Like 
series 193_843, 67_329 selects a combination of 117_1685, 123_1561, 
and 62_1265.  This plot's data shows that 314 of the chromosome samples that express 
67_329 also express 117_1685.  SNP count data shows that 281 of those chromosome samples 
express all 117 of the 117_1685 SNPs.  Another 24 express 116 SNPs.
</div>

In [5]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_67_329], min_match=0.5)
plt = plt_obj.do_plot()
am_67_329 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [6]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
353921,136501840,53819,10,2206,0.1,230,0.73,112,2.42,34,2.36,14,0.44,41,0.89,23,0.5,4,0.24,2,0.07
354033,136588031,5647,7,1760,0.09,161,0.51,115,3.55,36,3.57,8,0.36,1,0.03,1,0.03,0,0.0,0,0.0
353244,135758231,618284,117,1685,0.19,314,1.0,171,2.71,50,2.54,19,0.44,44,0.7,24,0.38,4,0.18,2,0.05
353478,135933921,434642,123,1561,0.2,310,0.99,171,2.74,50,2.57,19,0.44,44,0.71,21,0.34,4,0.18,1,0.03
353814,136406646,31432,5,1460,0.16,235,0.75,112,2.37,34,2.31,16,0.49,44,0.93,24,0.51,3,0.18,2,0.07
353269,135766890,509095,62,1265,0.24,309,0.98,170,2.73,50,2.58,19,0.44,44,0.71,21,0.34,4,0.18,1,0.03
353729,136309239,52321,9,887,0.35,314,1.0,171,2.71,50,2.54,19,0.44,44,0.7,24,0.38,4,0.18,2,0.05
353312,135784351,117869,8,718,0.44,313,1.0,170,2.7,50,2.55,19,0.44,44,0.7,24,0.38,4,0.18,2,0.05
353851,136448855,17976,4,612,0.36,219,0.7,104,2.36,32,2.33,15,0.49,40,0.91,23,0.52,3,0.19,2,0.07
353349,135810535,87488,9,545,0.57,312,0.99,169,2.69,50,2.56,19,0.44,44,0.7,24,0.38,4,0.18,2,0.05


In [7]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_67_329)
count_data

array([(114,   1), (115,   8), (116,  24), (117, 281)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>74_210</h3>
<div style="width:700px">
<p>
This plot shows the hierarchy selected by the series 74_210.  All
210 of the chromosome samples that express this 74 SNP series also express
117_1685.  Neither 123_1561 nor 62_1265 are part of this hierarchy.
SNP count data shows that 109 of these chromosome samples express all 117 of
the 117_1685 SNPs.  Another 89 of the chromosome samples express 116 SNPs.
Count data for series 123_1561 SNPs shows that the chromosome samples expressing
74_210 are free of any fragments of 123_1561.  The data shows that
179 of those chromosome samples express no 123_1561 SNPs and 207 express no 62_1265 SNPs.
The chromosome samples that expess 74_210 are the largest factor in identifying 117_1685
as a unique series distinct from 123_1561 and 62_1265.
</div>

In [8]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_74_210], min_match=0.5)
plt = plt_obj.do_plot()
am_74_210 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [9]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
353244,135758231,618284,117,1685,0.12,210,1.0,61,1.44,12,0.91,29,1.0,3,0.07,66,1.56,14,0.92,25,0.97
353906,136496493,57432,9,1170,0.16,188,0.9,48,1.27,10,0.85,28,1.07,3,0.08,64,1.69,13,0.95,22,0.95
353907,136496805,55824,9,1023,0.19,190,0.9,47,1.23,10,0.84,28,1.06,3,0.08,65,1.7,14,1.01,23,0.99
353984,136556805,190280,39,1014,0.16,167,0.8,30,0.89,9,0.86,28,1.21,3,0.09,63,1.88,13,1.07,21,1.03
353935,136511874,21321,5,976,0.13,131,0.62,48,1.82,10,1.22,21,1.16,3,0.11,30,1.14,7,0.74,12,0.75
353938,136514709,28438,6,820,0.16,130,0.62,47,1.8,10,1.23,21,1.17,3,0.11,30,1.15,7,0.74,12,0.75
353312,135784351,117869,8,718,0.29,210,1.0,61,1.44,12,0.91,29,1.0,3,0.07,66,1.56,14,0.92,25,0.97
353249,135759145,604162,74,210,1.0,210,1.0,61,1.44,12,0.91,29,1.0,3,0.07,66,1.56,14,0.92,25,0.97
353325,135790329,493075,18,131,0.99,130,0.62,0,0.0,2,0.25,26,1.44,3,0.11,60,2.3,14,1.48,25,1.57


In [10]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_74_210)
count_data

array([(112,   2), (114,   2), (115,   8), (116,  89), (117, 109)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [11]:
count_data = an.sa_123_1561.unique_snps_per_allele(am_74_210)
count_data

array([( 0, 179), ( 1,  22), ( 2,   8), (12,   1)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [35]:
count_data = an.sa_62_1265.unique_snps_per_allele(am_74_210)
count_data

array([(0, 207), (1,   2), (8,   1)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>70_166 not 13_1696</h3>
<div style="width:700px">
<p>
The series 70_166 has selected two of the hierarchies associated with
the series 28_434.  Those two hierarchies are distinguished by the
presence or absence of the series 13_1696.  This plot shows the hierarchy
associated with 70_166 but not 13_1696.  The next plot shows the one with
13_1696.  Both plots show data for chromosome samples that express
both 70_166 and 117_1685.  The data for the first plot represents 92 chromosome
samples that express both 117_1685 and 70_166.  These chromosome samples include
62 that also express the series 49_136.  SNP count data for 117_1685 show that more
chromosome samples express 116 SNPs than express all 117.  The SNP counts for
123_1561 show that only 26 of the chromosome samples express enough of the 123
SNPs to be counted as expressing the series.  But, the rest of 92 chromosome
samples all express large fragments of 123_1561.
</div>

In [12]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_70_166], [dm.di_13_1696], min_match=0.5)
plt = plt_obj.do_plot()
am_70_166_not_13_1696 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [13]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
354033,136588031,5647,7,1760,0.03,61,0.66,42,4.46,17,5.34,2,0.2,0,0.0,0,0.0,0,0.0,0,0.0
353244,135758231,618284,117,1685,0.05,92,1.0,66,4.65,22,4.58,4,0.27,0,0.0,0,0.0,0,0.0,0,0.0
353814,136406646,31432,5,1460,0.05,77,0.84,53,4.46,21,5.22,3,0.24,0,0.0,0,0.0,0,0.0,0,0.0
353729,136309239,52321,9,887,0.1,92,1.0,66,4.65,22,4.58,4,0.27,0,0.0,0,0.0,0,0.0,0,0.0
353312,135784351,117869,8,718,0.09,64,0.7,47,4.76,15,4.49,2,0.19,0,0.0,0,0.0,0,0.0,0,0.0
353851,136448855,17976,4,612,0.11,68,0.74,48,4.58,17,4.79,3,0.27,0,0.0,0,0.0,0,0.0,0,0.0
353764,136364916,22977,5,588,0.16,92,1.0,66,4.65,22,4.58,4,0.27,0,0.0,0,0.0,0,0.0,0,0.0
353349,135810535,87488,9,545,0.11,62,0.67,47,4.91,14,4.32,1,0.1,0,0.0,0,0.0,0,0.0,0,0.0
354061,136603638,28487,16,511,0.1,51,0.55,36,4.58,13,4.88,2,0.24,0,0.0,0,0.0,0,0.0,0,0.0
353614,136115507,269773,28,434,0.21,92,1.0,66,4.65,22,4.58,4,0.27,0,0.0,0,0.0,0,0.0,0,0.0


In [14]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_70_166_not_13_1696)
count_data

array([(114,  4), (115,  8), (116, 53), (117, 27)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [15]:
count_data = an.sa_123_1561.unique_snps_per_allele(am_70_166_not_13_1696)
count_data

array([(106,  2), (107,  4), (108,  5), (109, 38), (110, 17), (115, 24),
       (116,  2)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>70_166 and 13_1696</h3>
<div style="width:700px">
<p>
This plot shows data for 74 chromosome samples that express 70_166, 117_685, and 13_1696.
All of the 74 samples also express 28_434 and 49_136.  The 74 samples do express enough
117_1685 SNPs to be counted as expressing the series.  But, all of the samples express
numbers of 117_1685 SNPs that just meet the minimum criteria for being counted. 69
smaples express 106 of the 117_1685 SNPs.  The others express 105 or 107 of the SNPs.
The 74 chromosome samples include all 73 of the samples that express the series 22_73.
The main features of the process that lead to this hierarchy seem clear.  A recombination
event of a chromosome that expressed 70_166, 117_1685, 28_434, and 49_136 with one
that expressed 13_1696 yielded a chromosome that expressed 13_1696 and 117_1685
without the 117_1685 SNPs that overlap 13_1696.  That chromosome was selected
for overexpression by the appearance of the series 22_73.
</div>

In [16]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_70_166, dm.di_13_1696], min_match=0.5)
plt = plt_obj.do_plot()
am_70_166_and_13_1696 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [17]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
353240,135757320,20184,13,1696,0.04,74,1.0,47,3.16,26,5.6,1,0.1,0,0.0,0,0.0,0,0.0,0,0.0
353244,135758231,618284,117,1685,0.04,74,1.0,47,3.16,26,5.6,1,0.1,0,0.0,0,0.0,0,0.0,0,0.0
354130,136653925,107928,24,1504,0.04,64,0.86,41,3.18,22,5.48,1,0.11,0,0.0,0,0.0,0,0.0,0,0.0
353925,136506375,32564,4,1442,0.05,66,0.89,42,3.16,23,5.56,1,0.11,0,0.0,0,0.0,0,0.0,0,0.0
353958,136535876,19014,7,1303,0.05,66,0.89,42,3.16,23,5.56,1,0.11,0,0.0,0,0.0,0,0.0,0,0.0
354129,136652953,108222,5,1296,0.05,64,0.86,41,3.18,22,5.48,1,0.11,0,0.0,0,0.0,0,0.0,0,0.0
354127,136652491,80281,6,1114,0.06,64,0.86,41,3.18,22,5.48,1,0.11,0,0.0,0,0.0,0,0.0,0,0.0
353504,135964764,136368,6,946,0.07,62,0.84,37,2.96,24,6.17,1,0.12,0,0.0,0,0.0,0,0.0,0,0.0
353729,136309239,52321,9,887,0.08,74,1.0,47,3.16,26,5.6,1,0.1,0,0.0,0,0.0,0,0.0,0,0.0
353312,135784351,117869,8,718,0.1,74,1.0,47,3.16,26,5.6,1,0.1,0,0.0,0,0.0,0,0.0,0,0.0


In [18]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_70_166_and_13_1696)
count_data

array([(105,  3), (106, 69), (107,  2)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [39]:
count_data = an.sa_123_1561.unique_snps_per_allele(am_70_166_and_13_1696)
count_data

array([(107,  8), (108, 50), (109, 15), (110,  1)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>209_56</h3>
<div style="width:700px">
<p>
This plot shows data for another 56 chromosome samples that express 117_1685, 123_1561,
and 62_1265.  This hierarchy has been selected by the 209 SNP series 209_56.
The strong correlations with additional series in the upper part of the region
suggests that the initial selection series included 14_48 and 80_38.  The series
26_1414 and 10_2206 are parts of the EUR tree root.  The appearance of a
different association of a number of series in a different context adds evidence
that these series have more substance than a chance association preserved
by linkage disequilibrium.
</div>

In [20]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_209_56], min_match=0.5)
plt = plt_obj.do_plot()
am_209_56 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [21]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
353921,136501840,53819,10,2206,0.02,51,0.91,36,3.51,10,3.13,5,0.71,0,0.0,0,0.0,0,0.0,0,0.0
353244,135758231,618284,117,1685,0.03,56,1.0,40,3.55,11,3.13,5,0.64,0,0.0,0,0.0,0,0.0,0,0.0
353478,135933921,434642,123,1561,0.04,56,1.0,40,3.55,11,3.13,5,0.64,0,0.0,0,0.0,0,0.0,0,0.0
354130,136653925,107928,24,1504,0.02,37,0.66,26,3.49,6,2.59,5,0.98,0,0.0,0,0.0,0,0.0,0,0.0
353797,136398174,75924,26,1414,0.04,51,0.91,37,3.6,10,3.13,4,0.57,0,0.0,0,0.0,0,0.0,0,0.0
353269,135766890,509095,62,1265,0.04,56,1.0,40,3.55,11,3.13,5,0.64,0,0.0,0,0.0,0,0.0,0,0.0
353729,136309239,52321,9,887,0.06,56,1.0,40,3.55,11,3.13,5,0.64,0,0.0,0,0.0,0,0.0,0,0.0
353764,136364916,22977,5,588,0.1,56,1.0,40,3.55,11,3.13,5,0.64,0,0.0,0,0.0,0,0.0,0,0.0
353349,135810535,87488,9,545,0.1,56,1.0,40,3.55,11,3.13,5,0.64,0,0.0,0,0.0,0,0.0,0,0.0
354189,136704466,27748,5,212,0.17,36,0.64,26,3.59,6,2.66,4,0.8,0,0.0,0,0.0,0,0.0,0,0.0


In [22]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_209_56)
count_data

array([(116, 16), (117, 40)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [36]:
count_data = an.sa_123_1561.unique_snps_per_allele(am_209_56)
count_data

array([(121,  7), (122, 49)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [37]:
count_data = an.sa_62_1265.unique_snps_per_allele(am_209_56)
count_data

array([(61,  2), (62, 54)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>290_16</h3>
<div style="width:700px">
<p>
This plot shows data for the 16 chromosome samples that express the 290 SNP series
290_16.  All 16 of these series express all 117 SNPs of the series 117_1685.
All of these chromosome samples also express a 95 or 96 SNP fragment of 123_1561.
</div>

In [23]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_290_16], min_match=0.5)
plt = plt_obj.do_plot()
am_290_16 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [24]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
353244,135758231,618284,117,1685,0.01,16,1.0,14,4.35,1,1.0,1,0.45,0,0.0,0,0.0,0,0.0,0,0.0
353906,136496493,57432,9,1170,0.01,10,0.62,9,4.47,0,0.0,1,0.72,0,0.0,0,0.0,0,0.0,0,0.0
353907,136496805,55824,9,1023,0.01,10,0.62,9,4.47,0,0.0,1,0.72,0,0.0,0,0.0,0,0.0,0,0.0
353984,136556805,190280,39,1014,0.01,10,0.62,9,4.47,0,0.0,1,0.72,0,0.0,0,0.0,0,0.0,0,0.0
353935,136511874,21321,5,976,0.01,10,0.62,9,4.47,0,0.0,1,0.72,0,0.0,0,0.0,0,0.0,0,0.0
353729,136309239,52321,9,887,0.02,16,1.0,14,4.35,1,1.0,1,0.45,0,0.0,0,0.0,0,0.0,0,0.0
353938,136514709,28438,6,820,0.01,10,0.62,9,4.47,0,0.0,1,0.72,0,0.0,0,0.0,0,0.0,0,0.0
353312,135784351,117869,8,718,0.02,16,1.0,14,4.35,1,1.0,1,0.45,0,0.0,0,0.0,0,0.0,0,0.0
353764,136364916,22977,5,588,0.03,16,1.0,14,4.35,1,1.0,1,0.45,0,0.0,0,0.0,0,0.0,0,0.0
353349,135810535,87488,9,545,0.03,16,1.0,14,4.35,1,1.0,1,0.45,0,0.0,0,0.0,0,0.0,0,0.0


In [25]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_290_16)
count_data

array([(117, 16)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [26]:
count_data = an.sa_123_1561.unique_snps_per_allele(am_290_16)
count_data

array([(95,  2), (96, 14)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [38]:
count_data = an.sa_62_1265.unique_snps_per_allele(am_290_16)
count_data

array([(40, 16)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>9_39</h3>
<div style="width:700px">
<p>
This plot shows data for the 39 chromosome samples that express the series
9_39.  All of these chromosome samples express an 111 or 110 SNP fragment
of 117_1685 that is large enough to be counted as an instance of that
series.  The chromosome samples also express a 95 or 96 SNP fragment of 
123_1561 that is not large enough to be counted as an instance of it.
<p>
In the case of 9_39, the history that lead to this hierarchy can
be reconstructed.  The origin of this hierarchy is in the part of
the SAS tree selected by the series 8_267.  A recombination event associated
the upper part of the the EAS tree root that included series 32_1361 and 81_857
with an instance of that hierarchy.  As a consequence of the recombination
some higher positioned SNPs of 117_1685 and 123_1561 were lost.
The appearance of the series 9_39 selected the association of series
that resulted from this recombination event and resulted in their
overexpression.
</div>

In [27]:
plt_obj = dm.superset_yes_no([dm.di_117_1685, dm.di_9_39], min_match=0.5)
plt = plt_obj.do_plot()
am_9_39 = plt_obj.plot_context.yes_allele_mask
show(plt)

In [28]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
354033,136588031,5647,7,1760,0.02,32,0.82,1,0.16,0,0.0,9,2.03,0,0.0,19,2.96,0,0.0,3,0.76
353244,135758231,618284,117,1685,0.02,39,1.0,1,0.13,0,0.0,13,2.41,0,0.0,22,2.81,0,0.0,3,0.63
354130,136653925,107928,24,1504,0.02,29,0.74,1,0.17,0,0.0,9,2.24,0,0.0,16,2.75,0,0.0,3,0.84
353925,136506375,32564,4,1442,0.02,32,0.82,1,0.16,0,0.0,9,2.03,0,0.0,19,2.96,0,0.0,3,0.76
353791,136393658,92684,32,1361,0.03,39,1.0,1,0.13,0,0.0,13,2.41,0,0.0,22,2.81,0,0.0,3,0.63
353958,136535876,19014,7,1303,0.02,32,0.82,1,0.16,0,0.0,9,2.03,0,0.0,19,2.96,0,0.0,3,0.76
354129,136652953,108222,5,1296,0.02,29,0.74,1,0.17,0,0.0,9,2.24,0,0.0,16,2.75,0,0.0,3,0.84
353269,135766890,509095,62,1265,0.03,39,1.0,1,0.13,0,0.0,13,2.41,0,0.0,22,2.81,0,0.0,3,0.63
353919,136500475,42085,13,1227,0.03,32,0.82,1,0.16,0,0.0,9,2.03,0,0.0,19,2.96,0,0.0,3,0.76
354127,136652491,80281,6,1114,0.03,29,0.74,1,0.17,0,0.0,9,2.24,0,0.0,16,2.75,0,0.0,3,0.84


In [29]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_9_39)
count_data

array([(110,  1), (111, 38)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [30]:
count_data = an.sa_123_1561.unique_snps_per_allele(am_290_16)
count_data

array([(95,  2), (96, 14)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

<h3>Noise</h3>
<div style="width:700px">
<p>
This plot shows data for the 42 remaining chromosomes that express 117_1685
without any association with an obvious selector series.  The 117_1685 and
123_1561 SNP counts for these chromosomes show more variation
then is generally observed when the series are associated with some
selector series.
</div>

In [31]:
plt_obj = dm.superset_yes_no([dm.di_117_1685], 
                             [dm.di_9_39, dm.di_290_16, dm.di_209_56, dm.di_70_166, dm.di_74_210, 
                              dm.di_67_329, dm.di_193_843], min_match=0.5)
plt = plt_obj.do_plot()
am_rest = plt_obj.plot_context.yes_allele_mask
show(plt)

In [32]:
HTML(plt_obj.get_html())

index,first,length,snps,alleles,alleles.1,matches,matches.1,afr,afr.1,afx,afx.1,amr,amr.1,eas,eas.1,eur,eur.1,sas,sas.1,sax,sax.1
353244,135758231,618284,117,1685,0.02,42,1.0,28,4.29,6,2.95,0,0.0,1,0.1,5,0.49,1,0.35,1,0.22
353729,136309239,52321,9,887,0.02,21,0.5,19,5.83,2,1.97,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0
353312,135784351,117869,8,718,0.05,33,0.79,25,4.88,5,3.13,0,0.0,1,0.13,0,0.0,1,0.44,1,0.29


In [33]:
count_data = an.sa_117_1685.unique_snps_per_allele(am_rest)
count_data

array([(105, 3), (106, 1), (107, 3), (108, 6), (111, 3), (112, 1),
       (113, 2), (114, 2), (115, 5), (116, 9), (117, 7)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])

In [34]:
count_data = an.sa_123_1561.unique_snps_per_allele(am_rest)
count_data

array([(  0, 5), (  1, 1), ( 17, 6), ( 51, 1), ( 52, 2), ( 66, 1),
       ( 90, 1), ( 91, 5), (107, 3), (109, 6), (110, 1), (119, 1),
       (121, 1), (122, 4), (123, 4)], 
      dtype=[('count', '<u2'), ('snps', '<u2')])