# Notebook 3: Test hybrid origin of Eriosyce chilensis

In [1]:
import ipyrad.analysis as ipa
import pandas as pd

### DATA FILES

In [2]:
# load RAD dataset
SNPS = "../assembly/Eriosyce-ref-2021_outfiles/Eriosyce-ref-2021.snps.hdf5"

# get names from the database
info = ipa.snps_extracter(SNPS)

### Sample metadata

In [3]:
df = pd.read_csv("../metadata/pablo-cacti-relabeled-re.csv")
df.head(10)

Unnamed: 0,accession,Genus,specie,subspecie,Localidad,Lat,Long,oldname
0,1075,Eriosyce,litoralis,,TotoralilloIVR,-30.068,-71.375,litoralis
1,1083,Eriosyce,litoralis,,TotoralilloIVR,-30.069,-71.376,litoralis
2,1093,Eriosyce,clavata,,Qda.Manqueza,-29.914,-70.943,clavata
3,1288,Eriosyce,subgibbosa,,PeninsuladeHualpenVIIIR,-36.757,-73.176,subgibbosa
4,1290,Eriosyce,subgibbosa,,PeninsuladeHualpenVIIIR,-36.757,-73.176,subgibbosa
5,1332,Eriosyce,castanea,,CerroLaLajuela(StaCruz),-34.664,-71.417,castanea
6,1333,Eriosyce,castanea,,CerroLaLajuela(StaCruz),-34.664,-71.417,castanea
7,1349,Eriosyce,litoralis,,Pichidanguii,-32.156,-71.528,subgibbosa
8,1350,Eriosyce,litoralis,,Pichidanguii,-32.156,-71.528,subgibbosa
9,1351,Eriosyce,litoralis,,Pichidanguii,-32.156,-71.528,subgibbosa


## Setup ABBA-BABA tool

In [4]:
tool = ipa.baba2(data="../assembly/Eriosyce-ref-2021_outfiles/Eriosyce-ref-2021.snps.hdf5")

In [5]:
tool.ipcluster['cores'] = 10

## Setup Tests

In [27]:
IMAPS = [
    {
        "p4": ["BV190"],
        "p3": ["1355", "1358", "1371", "1383"],            # lito
        "p2": ["HPG1", "HPG2", "HPG4"],                    # lit-x-mutab
        "p1": ["1539", "1538", "1536", "1589"],            # mutab
    },
    {
        "p4": ["BV190"],
        "p3": ["1355", "1358", "1371", "1383"],            # lito
        "p2": ["SXM1", "SXM2"],                            # lit-x-mutab
        "p1": ["1539", "1538", "1536", "1589"],            # mutab
    },
    {
        "p4": ["BV190"],
        "p3": ["1539", "1538", "1536", "1589"],            # mutab
        "p2": ["1355", "1358", "1371", "1383"],            # lito
        "p1": ["1549", "1550", "1548", "1542"],            # chil
    },
    {
        "p4": ["BV190"],
        "p3": ["1539", "1538", "1536", "1589"],            # mutab
        "p2": ["1355", "1358", "1371", "1383"],            # lito
        "p1": ["1510", "1522", "1585", "1584"],            # chil-alb
    },
    {
        "p4": ["BV190"],
        "p3": ["1539", "1538", "1536", "1589"],            # mut
        "p2": ["1549", "1550", "1548", "1542"],            # chil
        "p1": ["1510", "1522", "1585", "1584"],            # chil-alb
    },
    {
        "p4": ["BV190"],
        "p3": ["1355", "1358", "1371", "1383"],            # lit
        "p2": ["1549", "1550", "1548", "1542"],            # chil
        "p1": ["1510", "1522", "1585", "1584"],            # chil-alb
    },
]

### Run analyses

In [28]:
tool.run(
    imaps=IMAPS,
    minmaps=[{i: 0.75 for i in ('p1', 'p2', 'p3', 'p4')} for i in IMAPS],
    nboots=1000,
)

[####################] 100% 0:14:31 | abba-baba tests 


### Summary of results
The bootstrap STD will vary slightly on repeated runs due to random sampling.

In [32]:
tool.results_table

Unnamed: 0,D,bootstd,Z,ABBA,BABA,nSNPs,nloci
0,0.323,0.033,9.931,166.706,85.299,9945,1944
1,0.504,0.026,19.649,378.592,124.714,13821,2793
2,-0.074,0.026,2.812,169.659,196.628,18759,3435
3,-0.102,0.021,4.979,216.484,265.733,24901,4525
4,-0.017,0.025,0.657,172.554,178.419,17938,3405
5,-0.075,0.02,3.75,297.469,345.648,21496,4236


### Test for geographically distant ingroup species

In [8]:
IMAPS = [
    {
        "p4": ["BV190"],
        "p3": ["1075", "1083"],                            # lito
        "p2": ["HPG1", "HPG2", "HPG4"],                    # lit-x-mutab
        "p1": ["1539", "1538", "1536", "1589"],            # mutab
    },
    {
        "p4": ["BV190"],
        "p3": ["1075", "1083"],                            # lito
        "p2": ["SXM1", "SXM2"],                            # lit-x-mutab
        "p1": ["1539", "1538", "1536", "1589"],            # mutab
    },
    {
        "p4": ["BV190"],
        "p3": ["1539", "1538", "1536", "1589"],            # mutab
        "p2": ["1075", "1083"],                            # lito
        "p1": ["1549", "1550", "1548", "1542"],            # chil
    },
    {
        "p4": ["BV190"],
        "p3": ["1539", "1538", "1536", "1589"],            # mutab
        "p2": ["1075", "1083"],                            # lito
        "p1": ["1510", "1522", "1585", "1584"],            # chil-alb
    },
    {
        "p4": ["BV190"],
        "p3": ["1075", "1083"],                            # lito
        "p2": ["1549", "1550", "1548", "1542"],            # chil
        "p1": ["1510", "1522", "1585", "1584"],            # chil-alb
    },
]

In [9]:
tool.run(
    imaps=IMAPS,
    minmaps=[{i: 0.75 for i in ('p1', 'p2', 'p3', 'p4')} for i in IMAPS],
    nboots=1000,
)

[####################] 100% 0:09:03 | abba-baba tests 


In [10]:
tool.results_table

Unnamed: 0,D,bootstd,Z,ABBA,BABA,nSNPs,nloci
0,0.32,0.04,7.951,123.275,63.53,8547,1817
1,0.476,0.033,14.23,264.273,93.747,11424,2533
2,-0.234,0.032,7.386,124.857,201.189,15462,3088
3,-0.252,0.029,8.803,159.043,266.441,19733,3882
4,-0.024,0.03,0.782,206.066,216.113,17040,3685


### Full sampling

In [36]:
lito_allo = df[(df.specie == "litoralis") & (df.oldname != "subgibbosa")].accession.tolist()
lito = df[(df.specie == "litoralis") & (df.oldname == "subgibbosa")].accession.tolist()
muta = df[(df.specie == "mutabilis")].accession.tolist()
chil = df[(df.specie == "chilensis")].accession.tolist()
albi = df[(df.specie == "chilensis-albidiflora")].accession.tolist()

In [37]:
IMAPS = [
    {
        "p4": ["BV190"],
        "p3": lito,                                        # lito
        "p2": ["HPG1", "HPG2", "HPG4"],                    # lit-x-mutab
        "p1": muta,                                        # mutab
    },
    {
        "p4": ["BV190"],
        "p3": lito,                                        # lito
        "p2": ["SXM1", "SXM2"],                            # lit-x-mutab
        "p1": muta,                                        # mutab
    },
    {
        "p4": ["BV190"],
        "p3": muta,            # mutab
        "p2": lito,            # lito
        "p1": chil,            # chil
    },
    {
        "p4": ["BV190"],
        "p3": muta,            # mutab
        "p2": lito_allo,            # lito
        "p1": chil,            # chil
    },
    {
        "p4": ["BV190"],
        "p3": muta,           # mutab
        "p2": lito,            # lito
        "p1": albi,            # chil-alb
    },
    {
        "p4": ["BV190"],
        "p3": muta,           # mutab
        "p2": lito_allo,            # lito
        "p1": albi,            # chil-alb
    },
    {
        "p4": ["BV190"],
        "p3": muta,            # mut
        "p2": chil,            # chil
        "p1": albi,            # chil-alb
    },
    {
        "p4": ["BV190"],
        "p3": lito,            # lito
        "p2": chil,            # chil
        "p1": albi,            # chil-alb
    },
    {
        "p4": ["BV190"],
        "p3": lito_allo,       # lito
        "p2": chil,            # chil
        "p1": albi,            # chil-alb
    },
]

In [38]:
tool.run(
    imaps=IMAPS,
    minmaps=[{i: 0.75 for i in ('p1', 'p2', 'p3', 'p4')} for i in IMAPS],
    nboots=1000,
)

[####################] 100% 0:19:24 | abba-baba tests 


In [39]:
tool.results_table

Unnamed: 0,D,bootstd,Z,ABBA,BABA,nSNPs,nloci
0,0.326,0.032,10.194,146.454,74.418,17157,1972
1,0.517,0.027,18.816,325.837,103.862,23805,2793
2,-0.106,0.02,5.235,87.25,107.945,23870,2388
3,-0.245,0.036,6.877,74.642,123.045,18781,2277
4,-0.108,0.015,7.213,138.659,172.13,36569,3763
5,-0.252,0.027,9.414,120.852,202.192,27554,3480
6,-0.023,0.016,1.457,95.018,99.568,22128,2405
7,-0.054,0.015,3.491,142.513,158.737,22814,2589
8,-0.03,0.023,1.315,105.184,111.605,17537,2431


In [40]:
tool.run(
    imaps=IMAPS,
    minmaps=[{i: 0.51 for i in ('p1', 'p2', 'p3', 'p4')} for i in IMAPS],
    nboots=1000,
)
tool.results_table

[####################] 100% 0:52:43 | abba-baba tests 


Unnamed: 0,D,bootstd,Z,ABBA,BABA,nSNPs,nloci
0,0.334,0.022,15.195,395.163,197.183,35881,3951
1,0.523,0.024,21.739,451.958,141.538,29152,3354
2,-0.091,0.014,6.726,220.792,265.238,48331,4702
3,-0.249,0.026,9.733,162.411,270.28,33827,4100
4,-0.104,0.012,8.935,276.598,340.844,60065,5993
5,-0.263,0.024,10.862,192.534,330.12,39337,4915
6,-0.021,0.011,1.85,232.022,241.982,44347,4667
7,-0.029,0.01,2.854,356.014,377.166,47051,5144
8,-0.018,0.014,1.302,226.014,234.436,31960,4377


In [42]:
tool.run(
    imaps = [{
        "p1": lito_allo,
        "p2": lito,
        "p3": muta,
        "p4": ["BV190"], 
    }],
    minmaps=[{i: 0.51 for i in ('p1', 'p2', 'p3', 'p4')} for i in IMAPS],
    nboots=1000,
)

[####################] 100% 0:30:54 | abba-baba tests 


In [43]:
tool.results_table

Unnamed: 0,D,bootstd,Z,ABBA,BABA,nSNPs,nloci
0,0.163,0.023,7.214,289.815,208.394,45870,5267
