# Test to assess whether ROH caller works
Run ROH caller on known test cases to assess whether it behaves correctly

In [6]:
### Some Code to set right paths on Harald's Machine
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os as os
import sys as sys
import multiprocessing as mp
import sys
import socket as socket

socket_name = socket.gethostname()
print(socket_name)
if socket_name == "VioletQueen":
    path = "/home/harald/git/HAPSBURG/"   # The Path on Harald's machine
elif socket_name.startswith("midway2"):
    print("Midway jnovmbre partition detected.")
    path = "/project2/jnovembre/hringbauer/HAPSBURG/"  # The Path on Midway Cluster
else: 
    raise RuntimeWarning("Not compatible machine. Check!!")
    
os.chdir(path)  # Set the right Path (in line with Atom default)

print(os.getcwd()) # Show the current working directory. Should be HAPSBURG/Notebooks/ParallelRuns
print(f"CPU Count: {mp.cpu_count()}")

### If wanting to use local version and not  pip installed version
#sys.path.append("./package/") # Append local Hapsburg Folder
sys.path.insert(0,"./package/")  # hack to get local package first in path
from hapsburg.PackagesSupport.hapsburg_run import hapsb_ind, hapsb_chrom  # Need this import

midway2-0403.rcc.local
Midway jnovmbre partition detected.
/project2/jnovembre/hringbauer/HAPSBURG
CPU Count: 28


In [11]:
##############################################
### Helper Functions for tests

def check_max_roh(path_roh_file, roh_max=(0.05,0.1)):
    """Check if file at path_roh_file has 
    maximum roh in roh_max [array of size 2, Morgan]"""
    df =  pd.read_csv(path_roh_file)
    m = np.max(df["lengthM"])
    return (roh_max[0]<m<roh_max[1])

##############################################

def test_chromosome_roh():
    """Do a testrun for one chromosome with known ROH (ca. 15 cM).
    Test hapROH function hapsb_ind"""
    path_roh_file = "./Empirical/Eigenstrat/Example/I1178/chr20/roh.csv"
    ### Delete the exisiting ROH file
    if os.path.exists(path_roh_file):
        os.remove(path_roh_file)

    hapsb_ind(iid="I1178", chs=range(20, 21), 
              path_targets='./Data/ExampleData/Levant_ChL', # The path before the .ind, .snp, .geno
              h5_path1000g='./Data/1000Genomes/HDF5/1240kHDF5/all1240int8/chr', 
              meta_path_ref='./Data/1000Genomes/Individuals/meta_df_all.csv', 
              folder_out='./Empirical/Eigenstrat/Example/', prefix_out='', 
              e_model='haploid', p_model='Eigenstrat', 
              post_model='Standard', processes=1, delete=False, output=True, save=True, 
              save_fp=False, n_ref=2504, exclude_pops=[], readcounts=True, random_allele=True, 
              roh_in=1, roh_out=20, roh_jump=300, e_rate=0.01, e_rate_ref=0.0, 
              cutoff_post=0.999, max_gap=0, roh_min_l=0.01, 
              logfile=False, combine=False, file_result='_roh_full.csv')
    
    ### Load the ROH file
    m =check_max_roh(path_roh_file, roh_max=(0.14,0.17))
    assert(m)    
    print(f"\nTest Passed!")
    return 0

##############################################
### Test Individual ROH Caller

def test_individual_roh(procs=6):
    """Test running indiviudal, combining and multiprocessing.
    procs: How many Processors to use"""
    ### Delete old files
    for ch in range(1,23):
        path_roh_file = f"./Empirical/Eigenstrat/Example/I1178/chr{ch}/roh.csv"
        if os.path.exists(path_roh_file):
            os.remove(path_roh_file)
            print(f"Deleted {path_roh_file}")
            
    path_full = "./Empirical/Eigenstrat/Example/I1178_roh_full.csv"
    if os.path.exists(path_full):
        os.remove(path_full)
        print(f"Deleted {path_full}")
    
    hapsb_ind(iid="I1178", chs=range(1,23), processes=procs, 
          path_targets='./Data/ExampleData/Levant_ChL', 
          h5_path1000g='./Data/1000Genomes/HDF5/1240kHDF5/all1240int8/chr', 
          meta_path_ref='./Data/1000Genomes/Individuals/meta_df_all.csv', 
          folder_out='./Empirical/Eigenstrat/Example/', prefix_out='', 
          e_model="haploid", p_model="Eigenstrat",
          random_allele=True, readcounts=False,
          delete=False, logfile=True, combine=True)
    
    df =  pd.read_csv(path_full)
    df_long = df[df["lengthM"]>0.04]
    s_roh = np.sum(df_long["lengthM"])
    assert(6<s_roh<8)
    print(f"\n Test passed!")
    return 0

##############################################
### Test Individual ROH Caller

def test_X_IBD():
    """Do a testrun for one chromosome with known ROH (ca. 15 cM).
    Test hapROH function hapsb_ind"""
    path_roh_file = "/project2/jnovembre/hringbauer/HAPSBURG/Empirical/dumpster/testx/I15965_I16171/chrX/roh.csv"
    ### Delete the exisiting ROH file
    if os.path.exists(path_roh_file):
        os.remove(path_roh_file)

    hapsb_chrom(iid=["I15965","I16171"], ch=23, save=True, save_fp=False, n_ref=2504, diploid_ref=False, exclude_pops=[],
                e_model='readcount', p_model='EigenstratX', readcounts=True, random_allele=False,
                post_model="IBD_X", logfile=False, 
                path_targets = "/project2/jnovembre/hringbauer/caribbean_roh/data/eigenstrat/v421_CaribIllu1000GancSam_bySite_PAM",
                h5_path1000g = "/project2/jnovembre/hringbauer/HAPSBURG/Data/1000Genomes/HDF5/1240kHDF5/all1240/chr", 
                meta_path_ref = "/project2/jnovembre/hringbauer/HAPSBURG/Data/1000Genomes/Individuals/meta_df_all_sex.tsv",
                folder_out = "/project2/jnovembre/hringbauer/HAPSBURG/Empirical/dumpster/testx/")
    
    ### Load the ROH file
    m =check_max_roh(path_roh_file, roh_max=(0.35,0.38))
    assert(m)
    print(f"\nTest Passed!")
    return 0

### Run the tests

### Single Chromsome with long ROH

In [None]:
test_chromosome_roh()

# Whole individual
Run indiivudal I1178

In [5]:
test_individual_roh(procs=6)

Doing Individual I1178...
Running 22 total jobs; 6 in parallel.
Set Output Log path: ./Empirical/Eigenstrat/Example/I1178/chr2/hmm_run_log.txt
Set Output Log path: ./Empirical/Eigenstrat/Example/I1178/chr1/hmm_run_log.txt
Set Output Log path: ./Empirical/Eigenstrat/Example/I1178/chr3/hmm_run_log.txt
Set Output Log path: ./Empirical/Eigenstrat/Example/I1178/chr4/hmm_run_log.txt
Set Output Log path: ./Empirical/Eigenstrat/Example/I1178/chr6/hmm_run_log.txt
Set Output Log path: ./Empirical/Eigenstrat/Example/I1178/chr5/hmm_run_log.txt
Combining Information for 22 Chromosomes...
Run finished successfully!

 Test passed!


0

# X Chromosome
Test IBD Calls between two male X chromosomes

In [None]:
test_X_IBD()