## NBX: ...

In [267]:
import toytree
import ipcoal
import gzip
import numpy as np
import pandas as pd

## Generate 100Kb sequence from a known ARG

In [274]:
sptree = toytree.rtree.imbtree(2, treeheight=5e5)
sptree = sptree.set_node_data("Ne", {0: 4e5, 1: 2e5, 2: 3e5})
sptree = sptree.set_node_data("name", [0, 1, 2])
sptree.draw('p', scale_bar=1e5);

In [254]:
model = ipcoal.Model(
    sptree, 
    nsamples=4,
    mut=2e-8,
    recomb=2e-9,
    discrete_genome=False, 
    record_full_arg=True,
    ancestry_model="smc_prime", 
    store_tree_sequences=True,
    seed_trees=123,
    seed_mutations=123,
)

In [255]:
model.sim_loci(1, 1e5)

In [256]:
print("N ARG intervals =", model.df.shape[0])

N ARG intervals = 939


### Create ARGweaver input files

In [257]:
model.write_sites_file("test", "/tmp", )
! head -n 10 /tmp/test.sites

NAMES	0_0	0_1	0_2	0_3	1_0	1_1	1_2	1_3
REGION	chr	1	100000
24	TTTTTATA
57	GGTGGGGG
59	GGGGTTTT
60	CCCCACCC
83	CGCCCCCC
90	AAAATTTT
135	AGAAAAAA
140	AATTAAAA


In [258]:
model.write_popfile("test", "/tmp", invert=True)
! cat /tmp/test.popfile.tsv

0_0	0
0_1	0
0_2	0
0_3	0
1_0	1
1_1	1
1_2	1
1_3	1

In [259]:
with open("/tmp/test.poptree", 'w') as out:
    out.write("npop 2\n")
    out.write("div 500000 1 0")
#     out.write("div 500000 1 2")

! cat /tmp/test.poptree

npop 2
div 500000 1 0

In [260]:
with open("/tmp/test.popsize", 'w') as out:
    out.write("# time, popsize, pop\n")
    out.write("0\t400000\t0\n")
    out.write("0\t200000\t1\n")
    out.write("500000\t300000\t0\n")
! cat /tmp/test.popsize

# time, popsize, pop
0	400000	0
0	200000	1
500000	300000	0


In [261]:
# ! arg-sample -h

In [None]:
%%bash --err /dev/null 
NAME="ARG-423-N1e5"
arg-sample \
    -m 2e-8 \
    -r 2e-9 \
    -n 100000 \
    -x 123 \
    --overwrite \
    --smc-prime \
    -s /tmp/test.sites \
    --pop-tree-file /tmp/test.poptree \
    --pop-file /tmp/test.popfile.tsv \
    --popsize-file /tmp/test.popsize \
    -c 1 \
    --ntimes 35 \
    --no-resample-mig \
    --maxtime 5000000 \
    -V 1 \
    -o /tmp/$NAME
    #--invisible-recombs \


prior:      -11287.949405
likelihood: -175974.681043
joint:      -187262.630448
nrecombs:   771
noncompats: 28
arglen:     1444453402419.999023
max memory: 24.9 MB

sample 460
forward (302 states,    700 blocks):   1.5 s
trace:                               541.6 ms
add thread:                          534.0 ms
resample_arg_leaf: accept=1.000000
sample time:   2.5 s

prior:      -11287.472621
likelihood: -175963.920578
joint:      -187251.393198
nrecombs:   771
noncompats: 29
arglen:     1447199034196.821777
max memory: 24.9 MB

sample 461
forward (306 states,    719 blocks):   1.3 s
trace:                               481.0 ms
add thread:                          480.6 ms
resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11294.277607
likelihood: -175930.167622
joint:      -187224.445229
nrecombs:   772
noncompats: 25
arglen:     1444883844578.670166
max memory: 24.9 MB

sample 462
forward (295 states,    713 blocks):   1.3 s
trace:                               4

trace:                               542.2 ms
add thread:                          545.8 ms
resample_arg_leaf: accept=1.000000
sample time:   2.5 s

prior:      -11495.091067
likelihood: -175920.886893
joint:      -187415.977960
nrecombs:   801
noncompats: 17
arglen:     1445935456733.998535
max memory: 24.9 MB

sample 487
resample_arg_regions: accept=0.250000
sample time:  13.1 s

prior:      -11514.377752
likelihood: -175912.106048
joint:      -187426.483799
nrecombs:   804
noncompats: 18
arglen:     1449796048184.077637
max memory: 24.9 MB

sample 488
forward (305 states,    744 blocks):   1.4 s
trace:                               506.1 ms
add thread:                          623.2 ms
resample_arg_leaf: accept=1.000000
sample time:   2.5 s

prior:      -11710.876957
likelihood: -175903.048629
joint:      -187613.925586
nrecombs:   819
noncompats: 18
arglen:     1447750765529.561768
max memory: 24.9 MB

sample 489
resample_arg_regions: accept=0.300000
sample time:   9.6 s

prior:   

resample_arg_leaf: accept=1.000000
sample time:   2.4 s

prior:      -11228.592941
likelihood: -175965.385264
joint:      -187193.978205
nrecombs:   774
noncompats: 25
arglen:     1442805734361.736816
max memory: 24.9 MB

sample 514
forward (301 states,    675 blocks):   1.2 s
trace:                               422.5 ms
add thread:                          425.6 ms
resample_arg_leaf: accept=1.000000
sample time:   2.0 s

prior:      -11043.269911
likelihood: -175990.919509
joint:      -187034.189420
nrecombs:   752
noncompats: 27
arglen:     1447828006082.998291
max memory: 24.9 MB

sample 515
forward (296 states,    688 blocks):   1.2 s
trace:                               444.0 ms
add thread:                          442.0 ms
resample_arg_leaf: accept=1.000000
sample time:   2.1 s

prior:      -10986.590622
likelihood: -175992.896949
joint:      -186979.487571
nrecombs:   747
noncompats: 27
arglen:     1444856130782.014648
max memory: 24.9 MB

sample 516
forward (305 states,    681


prior:      -11080.773489
likelihood: -175942.090139
joint:      -187022.863628
nrecombs:   761
noncompats: 29
arglen:     1445048029278.349365
max memory: 24.9 MB

sample 540
forward (291 states,    688 blocks):   1.3 s
trace:                               471.8 ms
add thread:                          590.8 ms
resample_arg_leaf: accept=1.000000
sample time:   2.4 s

prior:      -11102.489822
likelihood: -175935.964189
joint:      -187038.454011
nrecombs:   760
noncompats: 30
arglen:     1449050256464.356201
max memory: 24.9 MB

sample 541
resample_arg_regions: accept=0.150000
sample time:  11.0 s

prior:      -11166.602922
likelihood: -175937.422531
joint:      -187104.025453
nrecombs:   767
noncompats: 30
arglen:     1443961463505.668213
max memory: 24.9 MB

sample 542
forward (304 states,    703 blocks):   1.3 s
trace:                               495.5 ms
add thread:                          502.7 ms
resample_arg_leaf: accept=1.000000
sample time:   2.3 s

prior:      -11125.5382

nrecombs:   789
noncompats: 26
arglen:     1466571765147.069092
max memory: 24.9 MB

sample 567
forward (310 states,    726 blocks):   1.3 s
trace:                               495.1 ms
add thread:                          482.1 ms
resample_arg_leaf: accept=1.000000
sample time:   2.3 s

prior:      -11482.954263
likelihood: -175966.559125
joint:      -187449.513388
nrecombs:   791
noncompats: 27
arglen:     1464897898676.551514
max memory: 24.9 MB

sample 568
forward (310 states,    721 blocks):   1.3 s
trace:                               507.4 ms
add thread:                          522.3 ms
resample_arg_leaf: accept=1.000000
sample time:   2.4 s

prior:      -11569.548835
likelihood: -175979.105814
joint:      -187548.654649
nrecombs:   801
noncompats: 30
arglen:     1466120117216.131592
max memory: 24.9 MB

sample 569
resample_arg_regions: accept=0.450000
sample time:  11.9 s

prior:      -11798.562207
likelihood: -175955.847666
joint:      -187754.409873
nrecombs:   818
noncompa


prior:      -10855.844315
likelihood: -175923.739558
joint:      -186779.583873
nrecombs:   727
noncompats: 24
arglen:     1457559638328.119141
max memory: 24.9 MB

sample 597
resample_arg_regions: accept=0.200000
sample time:   7.3 s

prior:      -10995.217147
likelihood: -175935.963762
joint:      -186931.180909
nrecombs:   745
noncompats: 23
arglen:     1449701978103.394043
max memory: 24.9 MB

sample 598
resample_arg_regions: accept=0.100000
sample time:   8.1 s

prior:      -11077.249558
likelihood: -175917.322681
joint:      -186994.572239
nrecombs:   755
noncompats: 24
arglen:     1446204800052.919434
max memory: 24.9 MB

sample 599
forward (299 states,    690 blocks):   1.2 s
trace:                               435.4 ms
add thread:                          436.0 ms
resample_arg_leaf: accept=1.000000
sample time:   2.1 s

prior:      -11035.999852
likelihood: -175932.744279
joint:      -186968.744132
nrecombs:   751
noncompats: 26
arglen:     1445716140822.834473
max memory: 2

sample time:   2.3 s

prior:      -11646.735306
likelihood: -175901.889934
joint:      -187548.625240
nrecombs:   802
noncompats: 21
arglen:     1456327462354.427002
max memory: 24.9 MB

sample 624
forward (306 states,    726 blocks):   1.4 s
trace:                               553.9 ms
add thread:                          542.0 ms
resample_arg_leaf: accept=1.000000
sample time:   2.5 s

prior:      -11400.925618
likelihood: -175930.928457
joint:      -187331.854075
nrecombs:   781
noncompats: 25
arglen:     1459909857539.551025
max memory: 24.9 MB

sample 625
resample_arg_regions: accept=0.700000
sample time:   9.1 s

prior:      -11436.122793
likelihood: -175999.520073
joint:      -187435.642865
nrecombs:   789
noncompats: 29
arglen:     1450747823893.230225
max memory: 24.9 MB

sample 626
resample_arg_regions: accept=0.600000
sample time:  12.0 s

prior:      -11401.745375
likelihood: -175963.049551
joint:      -187364.794926
nrecombs:   781
noncompats: 24
arglen:     1452132959845


prior:      -11489.117849
likelihood: -175984.461735
joint:      -187473.579584
nrecombs:   807
noncompats: 30
arglen:     1436891727795.894531
max memory: 24.9 MB

sample 653
forward (303 states,    742 blocks):   1.4 s
trace:                               500.0 ms
add thread:                          495.9 ms
resample_arg_leaf: accept=1.000000
sample time:   2.4 s

prior:      -11437.396280
likelihood: -175993.348364
joint:      -187430.744644
nrecombs:   799
noncompats: 32
arglen:     1439828826068.677002
max memory: 24.9 MB

sample 654
resample_arg_regions: accept=0.100000
sample time:   7.6 s

prior:      -11395.303158
likelihood: -176000.915845
joint:      -187396.219003
nrecombs:   795
noncompats: 34
arglen:     1436879225128.760986
max memory: 24.9 MB

sample 655
resample_arg_regions: accept=0.300000
sample time:   8.2 s

prior:      -11232.293698
likelihood: -175992.353516
joint:      -187224.647214
nrecombs:   778
noncompats: 32
arglen:     1440827588335.756836
max memory: 2

nrecombs:   769
noncompats: 22
arglen:     1481446269048.680420
max memory: 24.9 MB

sample 680
forward (308 states,    690 blocks):   1.4 s
trace:                               544.5 ms
add thread:                          549.4 ms
resample_arg_leaf: accept=1.000000
sample time:   2.5 s

prior:      -11323.431324
likelihood: -175933.996382
joint:      -187257.427706
nrecombs:   768
noncompats: 24
arglen:     1478973661242.065918
max memory: 24.9 MB

sample 681
forward (296 states,    687 blocks):   1.3 s
trace:                               451.5 ms
add thread:                          451.9 ms
resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11325.125561
likelihood: -175926.018537
joint:      -187251.144098
nrecombs:   769
noncompats: 24
arglen:     1478650606206.371826
max memory: 24.9 MB

sample 682
resample_arg_regions: accept=0.400000
sample time:  11.6 s

prior:      -11305.963414
likelihood: -175970.528028
joint:      -187276.491442
nrecombs:   771
noncompa

sample time:   8.3 s

prior:      -11304.241128
likelihood: -175988.200336
joint:      -187292.441465
nrecombs:   778
noncompats: 28
arglen:     1436682917265.383789
max memory: 24.9 MB

sample 705
resample_arg_regions: accept=0.000000
sample time:   8.2 s

prior:      -11304.241128
likelihood: -175988.200336
joint:      -187292.441465
nrecombs:   778
noncompats: 28
arglen:     1436682917265.383789
max memory: 24.9 MB

sample 706
forward (302 states,    717 blocks):   1.3 s
trace:                               484.7 ms
add thread:                          481.6 ms
resample_arg_leaf: accept=1.000000
sample time:   2.3 s

prior:      -11440.614832
likelihood: -175988.169862
joint:      -187428.784694
nrecombs:   788
noncompats: 28
arglen:     1440572938812.305176
max memory: 24.9 MB

sample 707
resample_arg_regions: accept=0.400000
sample time:  10.9 s

prior:      -11904.307988
likelihood: -175985.617046
joint:      -187889.925034
nrecombs:   832
noncompats: 28
arglen:     1444138215994

add thread:                          493.0 ms
resample_arg_leaf: accept=1.000000
sample time:   2.3 s

prior:      -10834.778013
likelihood: -175963.583463
joint:      -186798.361476
nrecombs:   741
noncompats: 17
arglen:     1442177148771.851562
max memory: 24.9 MB

sample 731
resample_arg_regions: accept=0.200000
sample time:   8.0 s

prior:      -10955.546680
likelihood: -175977.418587
joint:      -186932.965267
nrecombs:   752
noncompats: 18
arglen:     1441844792535.250244
max memory: 24.9 MB

sample 732
resample_arg_regions: accept=0.200000
sample time:   7.7 s

prior:      -10976.350287
likelihood: -175995.288384
joint:      -186971.638671
nrecombs:   754
noncompats: 20
arglen:     1445212426863.215332
max memory: 24.9 MB

sample 733
resample_arg_regions: accept=0.400000
sample time:   7.6 s

prior:      -11203.333361
likelihood: -176000.508300
joint:      -187203.841661
nrecombs:   778
noncompats: 22
arglen:     1433580049165.292969
max memory: 24.9 MB

sample 734
resample_arg_

sample time:  12.0 s

prior:      -11389.894208
likelihood: -175968.502996
joint:      -187358.397204
nrecombs:   782
noncompats: 24
arglen:     1435156744560.849121
max memory: 24.9 MB

sample 758
forward (304 states,    738 blocks):   1.4 s
trace:                               490.2 ms
add thread:                          513.6 ms
resample_arg_leaf: accept=1.000000
sample time:   2.4 s

prior:      -11204.685413
likelihood: -175958.191572
joint:      -187162.876985
nrecombs:   768
noncompats: 23
arglen:     1435787267672.939209
max memory: 24.9 MB

sample 759
resample_arg_regions: accept=0.000000
sample time:   8.7 s

prior:      -11204.685413
likelihood: -175958.191572
joint:      -187162.876985
nrecombs:   768
noncompats: 23
arglen:     1435787267672.939209
max memory: 24.9 MB

sample 760
resample_arg_regions: accept=0.200000
sample time:   9.0 s

prior:      -11301.607323
likelihood: -175949.899625
joint:      -187251.506948
nrecombs:   778
noncompats: 24
arglen:     1443691719803

nrecombs:   717
noncompats: 26
arglen:     1451903786893.232910
max memory: 24.9 MB

sample 785
resample_arg_regions: accept=0.200000
sample time:  11.0 s

prior:      -10767.440196
likelihood: -175947.312131
joint:      -186714.752327
nrecombs:   721
noncompats: 27
arglen:     1452272943878.887695
max memory: 24.9 MB

sample 786
forward (310 states,    648 blocks):   1.3 s
trace:                               509.3 ms
add thread:                          487.6 ms
resample_arg_leaf: accept=1.000000
sample time:   2.3 s

prior:      -10715.624108
likelihood: -175945.203556
joint:      -186660.827664
nrecombs:   716
noncompats: 28
arglen:     1462718481112.814209
max memory: 24.9 MB

sample 787
resample_arg_regions: accept=0.300000
sample time:   7.9 s

prior:      -10727.627453
likelihood: -175953.182701
joint:      -186680.810153
nrecombs:   717
noncompats: 28
arglen:     1445142981269.559814
max memory: 24.9 MB

sample 788
resample_arg_regions: accept=0.300000
sample time:  10.8 s

pr


sample 812
forward (312 states,    678 blocks):   1.2 s
trace:                               445.4 ms
add thread:                          445.3 ms
resample_arg_leaf: accept=1.000000
sample time:   2.1 s

prior:      -11140.434215
likelihood: -175900.147404
joint:      -187040.581620
nrecombs:   744
noncompats: 22
arglen:     1452940676701.045166
max memory: 24.9 MB

sample 813
forward (308 states,    666 blocks):   1.2 s
trace:                               464.1 ms
add thread:                          474.7 ms
resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11064.155521
likelihood: -175903.559511
joint:      -186967.715032
nrecombs:   740
noncompats: 21
arglen:     1454912469129.352539
max memory: 24.9 MB

sample 814
forward (301 states,    665 blocks):   1.2 s
trace:                               435.1 ms
add thread:                          447.9 ms
resample_arg_leaf: accept=1.000000
sample time:   2.1 s

prior:      -11234.933935
likelihood: -175866.788035
j

sample time:  11.8 s

prior:      -11408.819827
likelihood: -175933.942549
joint:      -187342.762376
nrecombs:   784
noncompats: 21
arglen:     1432862265334.756592
max memory: 24.9 MB

sample 838
forward (290 states,    712 blocks):   1.3 s
trace:                               457.7 ms
add thread:                          466.6 ms
resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11473.043201
likelihood: -175921.029850
joint:      -187394.073051
nrecombs:   792
noncompats: 20
arglen:     1439738424491.972412
max memory: 24.9 MB

sample 839
resample_arg_regions: accept=0.350000
sample time:  12.1 s

prior:      -11506.171420
likelihood: -175934.994581
joint:      -187441.166001
nrecombs:   794
noncompats: 24
arglen:     1443196108780.629639
max memory: 24.9 MB

sample 840
resample_arg_regions: accept=0.350000
sample time:  11.9 s

prior:      -11502.820910
likelihood: -175952.993005
joint:      -187455.813915
nrecombs:   800
noncompats: 24
arglen:     1443717021565

resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11322.426480
likelihood: -175876.551244
joint:      -187198.977724
nrecombs:   774
noncompats: 18
arglen:     1458426741033.646729
max memory: 24.9 MB

sample 865
resample_arg_regions: accept=0.200000
sample time:   8.1 s

prior:      -11514.644989
likelihood: -175883.641133
joint:      -187398.286121
nrecombs:   802
noncompats: 16
arglen:     1440755708630.451904
max memory: 24.9 MB

sample 866
resample_arg_regions: accept=0.500000
sample time:   8.0 s

prior:      -11346.284300
likelihood: -175888.662719
joint:      -187234.947019
nrecombs:   783
noncompats: 16
arglen:     1445324313569.956543
max memory: 24.9 MB

sample 867
resample_arg_regions: accept=0.600000
sample time:   7.6 s

prior:      -11400.772349
likelihood: -175884.968816
joint:      -187285.741164
nrecombs:   789
noncompats: 19
arglen:     1451734658594.986816
max memory: 24.9 MB

sample 868
forward (304 states,    707 blocks):   1.2 s
trace:        

resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11026.918101
likelihood: -175962.772862
joint:      -186989.690963
nrecombs:   754
noncompats: 25
arglen:     1436834830119.576172
max memory: 24.9 MB

sample 893
forward (299 states,    687 blocks):   1.2 s
trace:                               434.4 ms
add thread:                          409.8 ms
resample_arg_leaf: accept=1.000000
sample time:   2.0 s

prior:      -11147.591461
likelihood: -175959.223883
joint:      -187106.815344
nrecombs:   766
noncompats: 25
arglen:     1439644183567.772949
max memory: 24.9 MB

sample 894
forward (289 states,    708 blocks):   1.2 s
trace:                               444.3 ms
add thread:                          430.3 ms
resample_arg_leaf: accept=1.000000
sample time:   2.1 s

prior:      -11178.578744
likelihood: -175940.643272
joint:      -187119.222016
nrecombs:   770
noncompats: 24
arglen:     1442375419044.132812
max memory: 24.9 MB

sample 895
forward (291 states,    691


sample 920
forward (299 states,    681 blocks):   1.2 s
trace:                               414.9 ms
add thread:                          424.7 ms
resample_arg_leaf: accept=1.000000
sample time:   2.1 s

prior:      -10633.074969
likelihood: -175982.229196
joint:      -186615.304166
nrecombs:   719
noncompats: 22
arglen:     1411305288134.831787
max memory: 24.9 MB

sample 921
resample_arg_regions: accept=0.150000
sample time:  10.6 s

prior:      -10837.348150
likelihood: -175957.423506
joint:      -186794.771657
nrecombs:   735
noncompats: 21
arglen:     1420244132621.589111
max memory: 24.9 MB

sample 922
resample_arg_regions: accept=0.250000
sample time:  11.5 s

prior:      -10790.374632
likelihood: -175970.038264
joint:      -186760.412895
nrecombs:   731
noncompats: 22
arglen:     1426696996492.501953
max memory: 24.9 MB

sample 923
resample_arg_regions: accept=0.600000
sample time:  10.5 s

prior:      -10917.482187
likelihood: -175964.062789
joint:      -186881.544976
nrecom

likelihood: -176015.283193
joint:      -187319.536012
nrecombs:   781
noncompats: 23
arglen:     1421857974528.931396
max memory: 24.9 MB

sample 948
forward (301 states,    733 blocks):   1.3 s
trace:                               453.3 ms
add thread:                          450.6 ms
resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11266.797677
likelihood: -176030.052084
joint:      -187296.849761
nrecombs:   780
noncompats: 24
arglen:     1417792560533.276855
max memory: 24.9 MB

sample 949
resample_arg_regions: accept=0.000000
sample time:   7.8 s

prior:      -11266.797677
likelihood: -176030.052084
joint:      -187296.849761
nrecombs:   780
noncompats: 24
arglen:     1417792560533.276855
max memory: 24.9 MB

sample 950
resample_arg_regions: accept=0.200000
sample time:  11.3 s

prior:      -11225.389938
likelihood: -175991.554340
joint:      -187216.944278
nrecombs:   775
noncompats: 24
arglen:     1425558997022.806641
max memory: 24.9 MB

sample 951
forward 

sample time:   2.4 s

prior:      -11469.871036
likelihood: -175985.812433
joint:      -187455.683469
nrecombs:   793
noncompats: 26
arglen:     1438462004648.127441
max memory: 24.9 MB

sample 975
forward (310 states,    722 blocks):   1.5 s
trace:                               512.6 ms
add thread:                          513.5 ms
resample_arg_leaf: accept=1.000000
sample time:   2.5 s

prior:      -11407.564993
likelihood: -175984.723474
joint:      -187392.288467
nrecombs:   785
noncompats: 26
arglen:     1441220157159.307129
max memory: 24.9 MB

sample 976
forward (307 states,    713 blocks):   1.3 s
trace:                               462.2 ms
add thread:                          465.5 ms
resample_arg_leaf: accept=1.000000
sample time:   2.2 s

prior:      -11493.444022
likelihood: -175970.730668
joint:      -187464.174690
nrecombs:   797
noncompats: 24
arglen:     1440029437961.145996
max memory: 24.9 MB

sample 977
forward (309 states,    738 blocks):   1.4 s
trace:           



sampling time:    1.8 h
max memory usage: 24.9 MB
FINISH


In [265]:
! zcat /tmp/ARG-423-N1e5.700.smc.gz | head -n 9

NAMES	1_2	1_1	1_3	0_1	0_2	1_0	0_3	0_0
REGION	chr	1	100000
TREE	1	8	(((7:285088.093426[&&NHX:age=0.000000:pop_path=0],3:285088.093426[&&NHX:age=0.000000:pop_path=0])13:253753.746871[&&NHX:age=285088.093426:pop_path=0],(5:207356.064150[&&NHX:age=0.000000:pop_path=1],(0:207356.064150[&&NHX:age=0.000000:pop_path=1],(1:8507.431562[&&NHX:age=0.000000:pop_path=1],2:8507.431562[&&NHX:age=0.000000:pop_path=1])9:198848.632588[&&NHX:age=8507.431562:pop_path=1])10:0.000000[&&NHX:age=207356.064150:pop_path=1])11:331485.776148[&&NHX:age=207356.064150:pop_path=1])12:861153.685538[&&NHX:age=538841.840298:pop_path=0],(6:391945.655380[&&NHX:age=0.000000:pop_path=0],4:391945.655380[&&NHX:age=0.000000:pop_path=0])8:1008049.870456[&&NHX:age=391945.655380:pop_path=0])14[&&NHX:age=1399995.525835:pop_path=0];
SPR	8	11	538841.840298	8	740778.780922	0
TREE	9	54	((7:285088.093426[&&NHX:age=0.000000:pop_path=0],3:285088.093426[&&NHX:age=0.000000:pop_path=0])13:1114907.432409[&&NHX:age=285088.093426:pop_path=0],((

In [187]:
def smc_relabeled_tree_generator(smc_file: str):
    """Return a generator of relabeled trees from a SMC.gz file."""
    with gzip.open(smc_file, 'rb') as idata:
        # get int to tip label translation dict
        trans = idata.readline().decode().strip().split()[1:]
        trans = {str(i): j for (i, j) in enumerate(trans)}

        for line in idata:
            line = line.decode()
            if line.startswith("TREE"):
                _, start, stop, nhx = line.split()
                tree = toytree.tree(nhx, feature_prefix="&&NHX:", feature_delim=":")
                tree.set_node_data("name", {i: trans[i.name] for i in tree[:tree.ntips]}, inplace=True)
                yield int(start) - 1, int(stop) - 1, tree                

In [188]:
igen = smc_relabeled_tree_generator("/tmp/arg-423.100.smc.gz")

In [189]:
print(next(igen))

(0, 97, <toytree.ToyTree at 0x7f44a17b6fe0>)


In [219]:
start, stop, tree = next(igen)
tree.draw('c');

In [117]:
NHX1 = "(7:4009316.431213[&&NHX:age=0.000000:pop_path=0],(5:2577919.020739[&&NHX:age=0.000000:pop_path=1],((0:440562.326462[&&NHX:age=0.000000:pop_path=1],(1:38735.880475[&&NHX:age=0.000000:pop_path=1],2:38735.880475[&&NHX:age=0.000000:pop_path=1])13:401826.445987[&&NHX:age=38735.880475:pop_path=1])8:1216980.920458[&&NHX:age=440562.326462:pop_path=1],(3:1329109.830062[&&NHX:age=0.000000:pop_path=0],(4:440562.326462[&&NHX:age=0.000000:pop_path=0],6:440562.326462[&&NHX:age=0.000000:pop_path=0])14:888547.503600[&&NHX:age=440562.326462:pop_path=0])9:328433.416859[&&NHX:age=1329109.830062:pop_path=0])11:920375.773818[&&NHX:age=1657543.246920:pop_path=0])12:1431397.410474[&&NHX:age=2577919.020739:pop_path=0])10[&&NHX:age=4009316.431213:pop_path=0];"

In [None]:
NHX1

In [118]:
t = toytree.tree(NHX1, feature_prefix="&&NHX:", feature_delim=":")
translate = {'0': '1_2', '1': '1_1', '2': '1_3', '3': '0_1', '4': '0_2', '5': '1_0', '6': '0_3', '7': '0_0'}
t = t.set_node_data("name", {i: translate[i.name] for i in t[:t.ntips]})
t.get_node_data()
t.draw('c');

In [121]:
#model.df.head(14)

In [1]:
import toytree

In [36]:
from loguru import logger

In [126]:
# toytree.tree(model.df.genealogy[14]).draw('c');

In [53]:
NHX = "(((7:6960.863443[&&NHX:age=0.000000:pop_path=0],4:6960.863443[&&NHX:age=0.000000:pop_path=0])14:491497.062101[&&NHX:age=6960.863443:pop_path=0],((6:46735.201770[&&NHX:age=0.000000:pop_path=0],3:46735.201770[&&NHX:age=0.000000:pop_path=0])12:451722.723774[&&NHX:age=46735.201770:pop_path=0],1:498457.925544[&&NHX:age=0.000000:pop_path=1])11:0.000000[&&NHX:age=498457.925544:pop_path=1])8:0.000000[&&NHX:age=498457.925544:pop_path=1],(2:498457.925544[&&NHX:age=0.000000:pop_path=1],(5:120522.878240[&&NHX:age=0.000000:pop_path=1],0:120522.878240[&&NHX:age=0.000000:pop_path=1])10:377935.047304[&&NHX:age=120522.878240:pop_path=1])13:0.000000[&&NHX:age=498457.925544:pop_path=1])9[&&NHX:age=498457.925544:pop_path=1];"
NHX = "(3:498457.925544[&&NHX:age=0.000000:pop_path=0],(((6:75062.469627[&&NHX:age=0.000000:pop_path=0],(7:6960.863443[&&NHX:age=0.000000:pop_path=0],4:6960.863443[&&NHX:age=0.000000:pop_path=0])14:68101.606184[&&NHX:age=6960.863443:pop_path=0])12:423395.455917[&&NHX:age=75062.469627:pop_path=0],((5:75062.469627[&&NHX:age=0.000000:pop_path=1],2:75062.469627[&&NHX:age=0.000000:pop_path=1])13:235498.704130[&&NHX:age=75062.469627:pop_path=1],1:310561.173756[&&NHX:age=0.000000:pop_path=1])8:187896.751787[&&NHX:age=310561.173756:pop_path=1])11:0.000000[&&NHX:age=498457.925544:pop_path=1],0:498457.925544[&&NHX:age=0.000000:pop_path=1])9:0.000000[&&NHX:age=498457.925544:pop_path=1])10[&&NHX:age=498457.925544:pop_path=1];"

NHX = "(((2:86757.518190[&&NHX:age=0.000000:pop_path=1],1:86757.518190[&&NHX:age=0.000000:pop_path=1])13:65796.427444[&&NHX:age=86757.518190:pop_path=1],0:152553.945635[&&NHX:age=0.000000:pop_path=1])12:676070.000307[&&NHX:age=152553.945635:pop_path=1],((4:568940.017807[&&NHX:age=0.000000:pop_path=0],5:568940.017807[&&NHX:age=0.000000:pop_path=1])10:259683.928134[&&NHX:age=568940.017807:pop_path=1],(7:86757.518190[&&NHX:age=0.000000:pop_path=0],(3:59540.371127[&&NHX:age=0.000000:pop_path=0],6:59540.371127[&&NHX:age=0.000000:pop_path=0])14:27217.147063[&&NHX:age=59540.371127:pop_path=0])11:741866.427751[&&NHX:age=86757.518190:pop_path=0])9:0.000000[&&NHX:age=828623.945941:pop_path=0])8[&&NHX:age=828623.945941:pop_path=0];"
t = toytree.tree(NHX, feature_prefix="&&NHX:", feature_delim=":")
translate = {'0': '1_2', '1': '1_1', '2': '1_3', '3': '0_1', '4': '0_2', '5': '1_0', '6': '0_3', '7': '0_0'}
t = t.set_node_data("name", {i: translate[i.name] for i in t[:t.ntips]})
t.get_node_data()

Unnamed: 0,idx,name,height,dist,support,age,pop_path
0,0,1_3,1e-06,86757.51819,,0.0,1.0
1,1,1_1,1e-06,86757.51819,,0.0,1.0
2,2,1_2,0.0,152553.945635,,0.0,1.0
3,3,0_2,1e-06,568940.017807,,0.0,0.0
4,4,1_0,1e-06,568940.017807,,0.0,1.0
5,5,0_0,1e-06,86757.51819,,0.0,0.0
6,6,0_1,1e-06,59540.371127,,0.0,0.0
7,7,0_3,1e-06,59540.371127,,0.0,0.0
8,8,,86757.518191,65796.427444,13.0,86757.51819,1.0
9,9,,152553.945635,676070.000307,12.0,152553.945635,1.0


In [54]:
t.draw('c', height=400);

In [46]:
# argtreever

In [16]:
model.write_vcf(outdir="/tmp", name="test", )

In [45]:
import msprime
msprime.log_arg_likelihood??
#(ts, recombination_rate=0, Ne=1)

In [23]:
model.seqs

array([[[0, 0, 3, ..., 0, 1, 2],
        [0, 0, 3, ..., 0, 1, 2],
        [1, 0, 3, ..., 0, 1, 2],
        ...,
        [0, 0, 3, ..., 0, 1, 2],
        [0, 0, 3, ..., 0, 1, 2],
        [0, 0, 3, ..., 0, 1, 2]]], dtype=uint8)

In [26]:
vdf = model.write_vcf()

In [45]:
vdf.head()

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,r0_0,...,r1_0,r1_1,r1_2,r1_3,r1_4,r1_5,r1_6,r1_7,r1_8,r1_9
0,1,1,.,A,C,99,PASS,.,GT,0|0,...,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0
1,1,15,.,G,C,99,PASS,.,GT,0|0,...,1|1,1|1,1|1,1|1,1|1,1|1,1|1,1|1,1|1,1|1
2,1,32,.,T,C,99,PASS,.,GT,0|0,...,0|0,0|0,0|0,0|0,0|0,0|0,0|0,1|1,0|0,1|1
3,1,49,.,A,C,99,PASS,.,GT,0|0,...,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0
4,1,51,.,G,T,99,PASS,.,GT,1|1,...,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0,0|0


In [None]:
model.w

In [46]:
model.seqs.shape

(1, 20, 100000)

In [43]:
def get_variants(model, i):
    return b"".join(ipcoal.io.transformer.convert_intarr_to_bytearr(model.seqs[:, :, i - 1])).decode()

In [44]:
print("NAMES\t" + "\t".join(model.alpha_ordered_names))
print("REGION\tchr\t1\t1000000")

for idx in vdf.index:
    pos = vdf.POS[idx]
    print(f"{pos}\t{get_variants(model, pos)}")

NAMES	r0_0	r0_1	r0_2	r0_3	r0_4	r0_5	r0_6	r0_7	r0_8	r0_9	r1_0	r1_1	r1_2	r1_3	r1_4	r1_5	r1_6	r1_7	r1_8	r1_9
REGION	chr	1	1000000
1	AACAAAAAAAAAAAAAAAAA
15	GGGGGGGGGGCCCCCCCCCC
32	TTTTTTTTTTTTTTTTTCTC
49	AACAAAAAAAAAAAAAAAAA
51	TGGGTGGTGGGGGGGGGGGG
61	AAAAAAAATAAAAAAAAAAA
78	TTTTTTTTTTCTTCCCTCCC
91	AAAATAAAAAAAAAAAAAAA
112	CTTTCTTCTTTTTTTTTTTT
116	ACCCACCACCCCCCCCCCCC
147	CCCCCCCCCCTTTTTTTTTT
160	GGGGGGGGGGCCCCCCCCCC
161	TTATTTTTTTTTTTTTTTTT
179	GTGGGGGGGGGGGGGGGGGG
190	CCGGCCCCGCGGGGGGGGGG
202	CGGGCGGCGGGGGGGGGGGG
208	GGGGGGGGGGCCCCCCCCCC
224	TTTGTTTTGTTTTTTTTTTT
234	AAAAAAAAAATTTTTTTTTT
247	CCCCCCACCCCCCCCCCCCC
251	TTATTTTTTTTTTTTTTTTT
265	GTGCGTGGCTGGGGGGGGGG
268	GGGGGGGGGGGGGGGGTGGG
296	GGAGGGGGGGGGGGGGGGGG
312	CCGCCCCCCCGGGGGGGGGG
321	CCCGCCCCGCCCCCCCCCCC
364	TTTTTTTTTTTGGTTTGTTT
368	GAGGGAGGGAGGGGGGGGGG
394	TTGTTTTTTTGGGGGGGGGG
409	GGGGGGGGGGGGGGGGGTGT
421	ATTTATTATTTTTTTTTTTT
444	AATTAAAATATTTTTTTTTT
469	GGGGGGGGGGAGGAAAGAAA
474	TTCTTTTTTTTTTTTTTTTT
489	CCCCCCCCCCCAACCCCCCC
492	AAA

In [17]:
sts = ts.simplify(filter_sites=False)

In [21]:
ts.num_trees, sts.num_trees

(29, 24)

In [23]:
t0 = ts.first(sample_lists=True)
for tree in ts.trees(sample_lists=True):
    print(t0.kc_distance(tree, lambda_=1), t0.kc_distance(tree, lambda_=0))
    t0 = tree.copy()

LibraryError: Unsimplified trees with unary nodes are not supported. (TSK_ERR_UNARY_NODES)

In [67]:
imap = model.get_imap_dict()

In [68]:
gtrees = toytree.mtree(model.df.genealogy)

In [69]:
gtrees.draw();

In [70]:
model.df.nbps

0     177.852871
1     113.481644
2     221.924836
3     184.679031
4      71.007865
         ...    
83    299.716833
84     80.974174
85     49.583483
86    125.140237
87     35.415211
Name: nbps, Length: 88, dtype: float64

In [71]:
model.df

Unnamed: 0,locus,start,end,nbps,nsnps,tidx,genealogy
0,0,0.000000,177.852871,177.852871,0,0,((((((((((((((r0_1:19561...
1,0,177.852871,291.334515,113.481644,0,1,((((((((((((((r0_1:19561...
2,0,291.334515,513.259351,221.924836,0,2,((((((((((((((r0_1:19561...
3,0,513.259351,697.938382,184.679031,0,3,((((((((((((((r0_1:19561...
4,0,697.938382,768.946247,71.007865,0,4,((((((((((((((r0_1:19561...
...,...,...,...,...,...,...,...
83,0,9409.170060,9708.886894,299.716833,0,83,((((((((((((((((((r2_0:3...
84,0,9708.886894,9789.861068,80.974174,0,84,(((((((((((r1_0:10612.79...
85,0,9789.861068,9839.444552,49.583483,0,85,(((((((((((r1_0:10612.79...
86,0,9839.444552,9964.584789,125.140237,0,86,(((((((((((r1_0:10612.79...


In [7]:
from ipcoal.msc.embedding import get_genealogy_embedding_table

In [45]:
garr = table[table[:, 6] == 0]
np.nonzero(garr[:, 7 + 5])[0]
np.nonzero((garr[:, 0] <= 100) & (garr[:, 1] >= 100))[0]


array([1])

In [None]:
garr

In [15]:
table = get_genealogy_embedding_table(model.tree, gtrees, imap, encode=True, df=False)

In [32]:
from ipcoal.smc.likelihood.embedding import get_fast_genealogy_embedding_table

In [44]:
# %%timeit
aa = get_fast_genealogy_embedding_table(model.tree, gtree, imap, df=True, encode=False)

In [47]:
a

Unnamed: 0,start,stop,st_node,neff,nedges,coal,edges,dist
0,0.0,23045.022173,0,100000.0,2,8.0,"[0, 1]",23045.022173
1,23045.02,500000.0,0,100000.0,1,,[8],476954.977827
2,0.0,31536.172978,1,100000.0,2,9.0,"[2, 3]",31536.172978
3,31536.17,500000.0,1,100000.0,1,,[9],468463.827022
4,0.0,193120.900455,2,100000.0,2,12.0,"[6, 7]",193120.900455
5,193120.9,500000.0,2,100000.0,1,,[12],306879.099545
6,0.0,119467.289232,3,100000.0,2,11.0,"[4, 5]",119467.289232
7,119467.3,500000.0,3,100000.0,1,,[11],380532.710768
8,500000.0,501881.673883,4,100000.0,2,10.0,"[8, 9]",1881.673883
9,501881.7,1000000.0,4,100000.0,1,,[10],498118.326117


In [45]:
aa

Unnamed: 0,start,stop,st_node,neff,nedges,dist,edges
0,0.0,23045.02,0,100000.0,2,23045.02,"[0, 1]"
1,23045.02,500000.0,0,100000.0,1,476955.0,[8]
2,0.0,31536.17,1,100000.0,2,31536.17,"[2, 3]"
3,31536.17,500000.0,1,100000.0,1,468463.8,[9]
4,0.0,193120.9,2,100000.0,2,193120.9,"[6, 7]"
5,193120.9,500000.0,2,100000.0,1,306879.1,[12]
6,0.0,119467.3,3,100000.0,2,119467.3,"[4, 5]"
7,119467.3,500000.0,3,100000.0,1,380532.7,[11]
8,500000.0,501881.7,4,100000.0,2,1881.674,"[8, 9]"
9,501881.7,1000000.0,4,100000.0,1,498118.3,[10]


In [20]:
t = TreeEmbedding(model.tree, genealogies=toytree.mtree(model.df.genealogy), imap=imap)

In [23]:
t.earr

array([[0.00000000e+00, 2.66470739e+05, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.66470739e+05, 5.00000000e+05, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 3.23124180e+05, 1.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [9.34235459e+05, 1.00000000e+06, 5.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       [1.00000000e+06, 1.10174437e+06, 6.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 0.00000000e+00],
       [1.10174437e+06,            inf, 6.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])

In [13]:
x = ipcoal.smc.likelihood.embedding.get_fast_genealogy_embedding_table(
    model.tree, toytree.tree(model.df.genealogy[0]), imap, gidx=99, encode=False, df=False)

In [14]:
pd.DataFrame(x)

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.0,500000.0,0.0,100000.0,2.0,500000.0,99,"[0, 1]"
1,0.0,154685.214895,1.0,100000.0,2.0,154685.214895,99,"[2, 3]"
2,154685.214895,500000.0,1.0,100000.0,1.0,345314.785105,99,[8]
3,0.0,71607.515,2.0,100000.0,2.0,71607.515,99,"[4, 5]"
4,71607.515,500000.0,2.0,100000.0,1.0,428392.485,99,[11]
5,0.0,500000.0,3.0,100000.0,2.0,500000.0,99,"[6, 7]"
6,500000.0,520954.193664,4.0,100000.0,3.0,20954.193664,99,"[0, 1, 8]"
7,520954.193664,671258.37898,4.0,100000.0,2.0,150304.185315,99,"[0, 9]"
8,671258.37898,1000000.0,4.0,100000.0,1.0,328741.62102,99,[10]
9,500000.0,631013.549286,5.0,100000.0,3.0,131013.549286,99,"[6, 7, 11]"


In [8]:
model.sim_loci(nloci=1, nsites=1e4)

In [11]:
model.ts_dict[0].dump("../argweaver/ts.trees")

### write to argweaver .sites format
...

In [13]:
model.write_snps_to_hdf5(name="test", outdir="../argweaver/")

wrote 556 SNPs to /home/deren/Documents/waiting-distances/notebooks/../argweaver/test.snps.hdf5


In [14]:
model.seqs

array([[[1, 2, 2, ..., 0, 1, 3],
        [1, 2, 2, ..., 0, 1, 3],
        [1, 2, 2, ..., 0, 1, 3],
        ...,
        [1, 2, 2, ..., 0, 1, 3],
        [1, 2, 2, ..., 0, 1, 3],
        [0, 2, 2, ..., 0, 1, 3]]], dtype=uint8)