In [3]:
import numpy as np
import os as os
import sys as sys
import multiprocessing as mp
import pandas as pd
import socket

### Pick the right path (whether on cluster or at home)
socket_name = socket.gethostname()
print(socket_name)
if socket_name == "VioletQueen":
    path = "/home/harald/git/HAPSBURG/"   # The Path on Harald's machine
elif socket_name.startswith("midway2"):
    print("Midway jnovmbre partition detected.")
    path = "/project2/jnovembre/hringbauer/HAPSBURG/"  # The Path on Midway Cluster
else: 
    raise RuntimeWarning("Not compatible machine. Check!!")
    
os.chdir(path)  # Set the right Path (in line with Atom default)
print(os.getcwd()) # Show the current working directory. Should be HAPSBURG/Notebooks/ParallelRuns
print(f"CPU Count: {mp.cpu_count()}")

### Assume that now we are in the root directory
sys.path.append("./Python3/")  
sys.path.append("./PackagesSupport/parallel_runs/")
sys.path.append("./PackagesSupport/")

from hmm_inference import HMM_Analyze   # The HMM core object
from helper_functions import prepare_path, multi_run, combine_individual_data
from hapsburg_run import hapsb_chrom, hapsb_ind
from pp_individual_roh_csvs import create_combined_ROH_df, give_iid_paths, pp_individual_roh

midway2-0402.rcc.local
Midway jnovmbre partition detected.
/project2/jnovembre/hringbauer/HAPSBURG
CPU Count: 28


### Test single Individual

In [None]:
hapsb_ind(iid="El Mirón_d", chs=range(1,23), processes=1, 
          path_targets = "./Data/Freilich20/AncCroatia1240KallSNPs",
          base_out_folder="./Empirical/Eigenstrat/Freilich20/",
          e_model="haploid", p_model="EigenstratUnpacked", n_ref=2504,
          destroy_phase=True, readcounts=False,
          delete=False, logfile=False, combine=True)

# Run all Individuals

In [5]:
meta_path="./Data/Freilich20/meta_processed.csv"
df = pd.read_csv(meta_path)
df = df[df["n_cov_snp"]>300000]
len(df["iid"])

28

In [None]:
for iid in df["iid"].values[:]:
    print(f"Doing Individual: {iid}")
    hapsb_ind(iid=iid, chs=range(1,23), processes=8, 
              path_targets = "./Data/Freilich20/AncCroatia1240KallSNPs",
              base_out_folder="./Empirical/Eigenstrat/Freilich20/",
              e_model="haploid", p_model="EigenstratUnpacked", n_ref=2504,
              destroy_phase=True, readcounts=False,
              delete=False, logfile=True, combine=True)

### Postprocess Freilich20 Individuals into one .csv

In [8]:
meta_path="./Data/Freilich20/meta_processed.csv"
df_anno = pd.read_csv(meta_path)
df_ana = df_anno[df_anno["n_cov_snp"]>4e5]
print(f"{len(df_ana)} Individuals with coverage >{4e5:.0f}")
iids = df_ana["iid"].values
len(iids)

28 Individuals with coverage >400000


28

In [9]:
%%time
df1 = pp_individual_roh(iids, meta_path="./Data/Freilich20/meta_processed.csv", base_folder="./Empirical/Eigenstrat/Freilich20/",
                        save_path="./Empirical/Eigenstrat/Freilich20/combined_roh05.csv", 
                        output=False, min_cm=[4,8,12], snp_cm=50, gap=0.5)

Loaded 28 / 28 Individuals from Meta
Saved to: ./Empirical/Eigenstrat/Freilich20/combined_roh05.csv
CPU times: user 12.6 s, sys: 0 ns, total: 12.6 s
Wall time: 13.1 s


In [10]:
df1

Unnamed: 0,iid,pop,max_roh,sum_roh>4,n_roh>4,sum_roh>8,n_roh>8,sum_roh>12,n_roh>12,lat,lon,age,age_range,study,clst,mean_cov,n_cov_snp,include_alt
0,ZEM05,Croatia_MN,45.577901,302.770595,15,289.826496,13,263.549485,10,45.747,18.57,,4700-4300 BCE,Freilich20,Croatia_MN,0.658676,790411,1
1,ZEM09,Croatia_MN,25.909401,204.762349,16,174.349139,11,116.882524,5,45.747,18.57,,4700-4300 BCE,Freilich20,Croatia_MN,0.723044,867653,1
2,ZEM02,Croatia_MN,23.0921,64.148899,7,37.982001,2,37.982001,2,45.747,18.57,,4700-4300 BCE,Freilich20,Croatia_MN,0.713307,855968,1
3,ZEM07,Croatia_MN,29.969901,48.308194,3,41.647995,2,29.969901,1,45.747,18.57,,4790-4558 calBCE,Freilich20,Croatia_MN,0.718101,861721,1
4,JAG78,Croatia_Jagodnjak_MBA,12.838701,36.847298,5,12.838701,1,12.838701,1,45.687,18.506,,1800-1600 BCE,Freilich20,Croatia_Jagodnjak_MBA,0.768548,922257,1
5,JAG58,Croatia_Jagodnjak_MBA,12.301404,36.251099,3,36.251099,3,24.487204,2,45.687,18.506,,1800-1600 BCE,Freilich20,Croatia_Jagodnjak_MBA,0.635173,762207,1
6,ZEM33,Croatia_MN,25.842404,33.203006,2,25.842404,1,25.842404,1,45.747,18.57,,4603-4224 calBCE,Freilich20,Croatia_MN,0.591951,710341,1
7,JAG93,Croatia_Jagodnjak_MBA,15.2908,28.0721,3,23.8525,2,15.2908,1,45.687,18.506,,1800-1600 BCE,Freilich20,Croatia_Jagodnjak_MBA,0.672713,807256,1
8,JAG06,Croatia_Jagodnjak_MBA,9.6442,22.9088,3,9.6442,1,0.0,0,45.687,18.506,,1800-1600 BCE,Freilich20,Croatia_Jagodnjak_MBA,0.705704,846845,1
9,ZEM14,Croatia_MN,10.793698,18.431198,2,10.793698,1,0.0,0,45.747,18.57,,4763-4536 calBCE,Freilich20,Croatia_MN,0.761184,913421,1
