# Run IBD on Punic Sample (including non-located indivdiuals)

In [1]:
import numpy as np
import pandas as pd
import os
import sys as sys
import socket
import matplotlib.pyplot as plt
import multiprocessing as mp
import itertools as it
#from adjustText import adjust_text

import warnings
warnings.filterwarnings("ignore") # Great Style
socket_name = socket.gethostname()
print(socket_name)

if socket_name.startswith("compute-"):
    print("HSM Computational partition detected.")
    path = "/n/groups/reich/hringbauer/git/punic_aDNA/"  # The Path on Midway Cluster
else:
    raise RuntimeWarning("No compatible machine. Check!!")

os.chdir(path)  # Set the right Path (in line with Atom default)
# Show the current working directory. Should be HAPSBURG/Notebooks/ParallelRuns

print(os.getcwd())
print(f"CPU Count: {mp.cpu_count()}")

sys.path.insert(0,"/n/groups/reich/hringbauer/git/hapBLOCK/package/")  # hack to get development package first in path
from ancIBD.run import hapBLOCK_chroms
from ancIBD.IO.batch_run import get_run_params_from_i, save_ibd_df
from ancIBD.IO.ind_ibd import create_ind_ibd_df, ind_all_ibd_df

compute-e-16-229.o2.rc.hms.harvard.edu
HSM Computational partition detected.
/n/groups/reich/hringbauer/git/punic_aDNA
CPU Count: 28


## 1) Get the indivdiuals to run 

In [5]:
min_snp = 500000

df_meta = pd.read_csv("/n/groups/reich/hringbauer/Data/v54.1.anno.csv")
print(f"Loaded {len(df_meta)} Indiviuals for meta data")

### Load Punic indivdiuals from Assignement
df_pun = pd.read_csv("./data/cluster_assignments_punic.v54.1d.tsv", sep="\t")
df_pun = df_pun[df_pun["label"].str.contains("Punic")].copy()
print(f"\nLoaded {len(df_pun)} Punic Indiviudals")

### Merge Meta Data and Punic Indivdiuals
df_p = pd.merge(df_pun, df_meta, on="iid")
print(f"Merged in meta to {len(df_p)} Punic Indivdiuals")

df_ibd_iid = df_p[df_p["n_cov_snp"]>min_snp].reset_index(drop=True)
iids = df_ibd_iid["iid"].values
print(f"{len(df_ibd_iid)} Individuals with >{min_snp} 1240k SNPs covered")

Loaded 33967 Indiviuals for meta data

Loaded 144 Punic Indiviudals
Merged in meta to 144 Punic Indivdiuals
91 Individuals with >500000 1240k SNPs covered


In [6]:
df_ibd_iid["location"].value_counts()

Kerkouene     23
Tharros        9
Birgi          9
Cap Bon        8
Carthage       8
Lilybaeum      6
Motya          6
Eivissa        5
Villaricos     4
Malaga         4
Selinunte      4
Akhziv         3
Cadiz          1
Palermo        1
Name: location, dtype: int64

## 2) Run the ancIBD IBD Inference

### Test Chromosome 20 for full sample

In [7]:
%%time
version = "v54.1"
folder_in =  f"/n/groups/reich/hringbauer/git/hapBLOCK/data/hdf5/1240k_{version}/ch" # for hdf5
ch = 20
path_ibd = f'/n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/{version}.ch{ch}.tsv'
path_ibd

df_ibd = hapBLOCK_chroms(folder_in=folder_in,
                         iids=iids[:], run_iids=[],
                         ch=ch, folder_out="",
                         output=False, prefix_out='', logfile=False,
                         l_model='h5', e_model='haploid_gl2', h_model='FiveStateScaled', 
                         t_model='standard', p_col="variants/AF_ALL",
                         ibd_in=1, ibd_out=10, ibd_jump=400,
                         min_cm=6, cutoff_post=0.99, max_gap=0.0075,
                         processes=1)

save_ibd_df(df_ibd, savepath=path_ibd, create=False)

Saved 43 IBD blocks.
CPU times: user 48.1 s, sys: 1.26 s, total: 49.3 s
Wall time: 51.7 s


### 2a) Run all Chromosomes

In [8]:
for ch in range(1,23):
    print(f"Running ch: {ch}")
    version = "v54.1"
    folder_in =  f"/n/groups/reich/hringbauer/git/hapBLOCK/data/hdf5/1240k_{version}/ch" # for hdf5
    path_ibd = f'/n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/{version}.ch{ch}.tsv'

    df_ibd = hapBLOCK_chroms(folder_in=folder_in,
                             iids=iids[:], run_iids=[],
                             ch=ch, folder_out="",
                             output=False, prefix_out='', logfile=False,
                             l_model='h5', e_model='haploid_gl2', h_model='FiveStateScaled', 
                             t_model='standard', p_col="variants/AF_ALL",
                             ibd_in=1, ibd_out=10, ibd_jump=400,
                             min_cm=6, cutoff_post=0.99, max_gap=0.0075,
                             processes=1)

    save_ibd_df(df_ibd, savepath=path_ibd, create=False)

Running ch: 1
Saved 724 IBD blocks.
Running ch: 2
Saved 133 IBD blocks.
Running ch: 3
Saved 72 IBD blocks.
Running ch: 4
Saved 72 IBD blocks.
Running ch: 5
Saved 58 IBD blocks.
Running ch: 6
Saved 48 IBD blocks.
Running ch: 7
Saved 77 IBD blocks.
Running ch: 8
Saved 92 IBD blocks.
Running ch: 9
Saved 62 IBD blocks.
Running ch: 10
Saved 108 IBD blocks.
Running ch: 11
Saved 42 IBD blocks.
Running ch: 12
Saved 53 IBD blocks.
Running ch: 13
Saved 44 IBD blocks.
Running ch: 14
Saved 104 IBD blocks.
Running ch: 15
Saved 495 IBD blocks.
Running ch: 16
Saved 42 IBD blocks.
Running ch: 17
Saved 73 IBD blocks.
Running ch: 18
Saved 60 IBD blocks.
Running ch: 19
Saved 58 IBD blocks.
Running ch: 20
Saved 43 IBD blocks.
Running ch: 21
Saved 146 IBD blocks.
Running ch: 22
Saved 41 IBD blocks.


# 2b) Post-process IBD Run

In [9]:
from ancIBD.IO.ind_ibd import create_ind_ibd_df, combine_all_chroms

In [10]:
%%time
combine_all_chroms(folder_base=f"/n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/{version}.ch",
                   path_save=f"/n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/{version}.ch_all.tsv")

Chromosome 1; Loaded 724 IBD
Chromosome 2; Loaded 133 IBD
Chromosome 3; Loaded 72 IBD
Chromosome 4; Loaded 72 IBD
Chromosome 5; Loaded 58 IBD
Chromosome 6; Loaded 48 IBD
Chromosome 7; Loaded 77 IBD
Chromosome 8; Loaded 92 IBD
Chromosome 9; Loaded 62 IBD
Chromosome 10; Loaded 108 IBD
Chromosome 11; Loaded 42 IBD
Chromosome 12; Loaded 53 IBD
Chromosome 13; Loaded 44 IBD
Chromosome 14; Loaded 104 IBD
Chromosome 15; Loaded 495 IBD
Chromosome 16; Loaded 42 IBD
Chromosome 17; Loaded 73 IBD
Chromosome 18; Loaded 60 IBD
Chromosome 19; Loaded 58 IBD
Chromosome 20; Loaded 43 IBD
Chromosome 21; Loaded 146 IBD
Chromosome 22; Loaded 41 IBD
Saved 2647 IBD to /n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/v54.1.ch_all.tsv.
CPU times: user 195 ms, sys: 15.6 ms, total: 211 ms
Wall time: 239 ms


In [11]:
%%time

### Takes about 1 min
df_res = create_ind_ibd_df(ibd_data = f"/n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/{version}.ch_all.tsv",
                      min_cms = [8, 12, 16, 20], snp_cm = 220, min_cm = 5, sort_col = 0,
                      savepath = f"/n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/{version}.ibd_ind.d220.tsv")

> 5 cM: 2647/2647
Of these with suff. SNPs per cM> 220:               829/2647
2     94
1     69
3     59
7     52
5     50
4     47
6     47
9     46
10    44
11    42
8     41
12    36
13    31
17    30
14    24
15    23
20    23
18    22
16    21
21    18
22     6
19     4
Name: ch, dtype: int64
Saved 363 individual IBD pairs to: /n/groups/reich/hringbauer/git/punic_aDNA/output/ibd/v54.1.ibd_ind.d220.tsv
CPU times: user 792 ms, sys: 34.6 ms, total: 827 ms
Wall time: 876 ms


### 3) Explore Results

In [12]:
df_res

Unnamed: 0,iid1,iid2,max_IBD,sum_IBD>8,n_IBD>8,sum_IBD>12,n_IBD>12,sum_IBD>16,n_IBD>16,sum_IBD>20,n_IBD>20
329,I35351,I35333,282.987110,3332.207697,21.0,3332.207697,21.0,3332.207697,21.0,3332.207697,21.0
217,I24494,I24040,283.652203,3326.638294,20.0,3326.638294,20.0,3326.638294,20.0,3326.638294,20.0
86,I22122,I22118,213.983188,3308.844301,25.0,3308.844301,25.0,3308.844301,25.0,3274.698507,23.0
219,I24494,I24193,283.652203,2048.808393,33.0,2029.570302,31.0,2001.710003,29.0,1966.079616,27.0
85,I22122,I22117,176.386502,1706.853619,24.0,1706.853619,24.0,1706.853619,24.0,1706.853619,24.0
...,...,...,...,...,...,...,...,...,...,...,...
132,I22252,I28504,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
131,I22252,I22260,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
130,I22236,I35334,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
128,I22236,I24045,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0


# Area 51
Try out code here