# Test a Full Run on a single CPU
Goal: Test Code (after updates / rewrites)

In [1]:
import numpy as np
import os as os
import sys as sys
import multiprocessing as mp
import pandas as pd
import socket

socket_name = socket.gethostname()
print(socket_name)
if socket_name == "VioletQueen":
    path = "/home/harald/git/HAPSBURG/"   # The Path on Harald's machine
elif socket_name.startswith("midway2"):
    print("Midway jnovmbre partition detected.")
    path = "/project/jnovembre/hringbauer/HAPSBURG/"  # The Path on Midway Cluster
else: 
    raise RuntimeWarning("Not compatible machine. Check!!")

os.chdir(path)  # Set the right Path (in line with Atom default)
sys.path.append("./Python3/")  # Since now we are in the Root Directory
from hmm_inference import HMM_Analyze   # Do not move. Should be after sys.path..
#sys.path.append("./Python3/create1000G_Mosaic/")  # Since now we are in the Root Directory
#from createMosaicsMulti import Mosaic_1000G_Multi  # Import the object that can create the Multiruns

print(os.getcwd()) # Show the current working directory. Should be HAPSBURG/Notebooks/ParallelRuns
print(f"CPU Count: {mp.cpu_count()}")

VioletQueen
/home/harald/git/HAPSBURG
CPU Count: 4


In [2]:
def analyze_individual(iid, ch=3, n_ref=503, save=True, save_fp=False,
                       path_mosaic="./Simulated/1000G_Mosaic/TSI/ch3_5cm/",
                       exclude_pops=["TSI", ], prefix_out="", 
                       roh_in=1, roh_out=10, roh_jump=100, e_rate=0.001, 
                       destroy_phase=True, clean_up=False, 
                       e_model="haploid", p_model="MosaicHDF5"):
    """Run the analysis for one individual and chromosome.
    Wrapper for HMM Class"""
    
    ### Create Folder if needed, and pipe output if wanted
    if not os.path.exists(path_mosaic):
        raise RuntimeError(f"Path {path_mosaic} not Found. Check!")

    ### Do the full HMM Analysis
    hmm = HMM_Analyze(cython=2, p_model=p_model, e_model=e_model,
                      manual_load=True, save=save, save_fp=save_fp)  # diploid_gt for analysis of dpld.

    ### Load and prepare the pre-processing Model
    hmm.load_preprocessing_model()              # Load the preprocessing Model
    hmm.p_obj.set_folder(path_mosaic)         # Set the Folder
    hmm.p_obj.set_params(destroy_phase=destroy_phase, prefix_out_data=prefix_out,
                        excluded=exclude_pops)
    
    ### DELETE when run for with European Reference!!
    #hmm.p_obj.set_params(h5_path1000g = "./Data/1000Genomes/HDF5/1240kHDF5/all1240/chr", 
    #                     meta_path_ref = "./Data/1000Genomes/Individuals/meta_df_all.csv")
    
    hmm.load_data(iid=iid, ch=ch, n_ref=n_ref)  # Load the actual Data
    
    ### Emission and Transition Model
    hmm.load_secondary_objects()
    
    ### Set the Parameters
    hmm.t_obj.set_params(roh_in=roh_in, roh_out=roh_out, roh_jump=roh_jump)
    hmm.e_obj.set_params(e_rate=e_rate)
    
    #hmm.calc_viterbi_path(save=save)           # Calculate the Viterbi Path.
    hmm.calc_posterior(save=save)              # Calculate the Posterior.
    hmm.post_processing(save=save)             # Do the Post-Processing.
    
    if clean_up == True:
        hmm.post_obj.clean_up()   # Delete all but ROH csvs
    
    print(f"Analysis of {iid} and Chr. {ch} successfully concluded!")

In [6]:
%%time
analyze_individual(iid="iid0", ch=3, n_ref=500, save=True, save_fp=False,
                   path_mosaic="./Simulated/1000G_Mosaic/TSI/ch3_5cm/",
                   exclude_pops=["TSI", ], prefix_out="test/", 
                   roh_in=100, roh_out=100, roh_jump=300, e_rate=0.001,
                   destroy_phase=True, clean_up=False)

Using Low-Mem Cython Linear Speed Up.
Loaded Pre Processing Model: MosaicHDF5
Loading Individual: iid0

Loaded 77652 variants
Loaded 20 individuals
HDF5 loaded from ./Simulated/1000G_Mosaic/TSI/ch3_5cm/data.h5

Loaded 77652 variants
Loaded 503 individuals
HDF5 loaded from ./Data/1000Genomes/HDF5/1240kHDF5/Eur1240chr3.hdf5

Intersection on Positions: 77652
Nr of Matching Refs: 77652 / 77652
Full Intersection Ref/Alt Identical: 77652 / 77652
396 / 503 Individuals included in Reference
Extraction of 792 Haplotypes Complete!
Markers called 77652 / 77652
Successfully saved to: ./Simulated/1000G_Mosaic/TSI/ch3_5cm/output/iid0/chr3/test/
Shuffling phase of target...
Successfully loaded Data from: ./Simulated/1000G_Mosaic/TSI/ch3_5cm/output/iid0/chr3/test/
Loaded Emission Model: haploid
Loaded Transition Model: model
Loaded Post Processing Model: Standard
Minimum Genetic Map: 0.0000
Maximum Genetic Map: 2.2326
Gaps bigger than 0.1 cM: 214
Maximum Gap: 0.2348 cM
Loaded Transition and Emission M

## Run it for diploid Genotype Data

In [3]:
%%time
analyze_individual(iid="iid0", ch=3, n_ref=500, save=True, save_fp=False,
                   path_mosaic="./Simulated/1000G_Mosaic/TSI/ch3_5cm/",
                   exclude_pops=["TSI", ], prefix_out="testDIPLOID/", 
                   roh_in=100, roh_out=100, roh_jump=300, e_rate=0.001,
                   destroy_phase=False, clean_up=False, e_model="diploid_gt")

Using Low-Mem Cython Linear Speed Up.
Loaded Pre Processing Model: MosaicHDF5
Loading Individual: iid0

Loaded 77652 variants
Loaded 20 individuals
HDF5 loaded from ./Simulated/1000G_Mosaic/TSI/ch3_5cm/data.h5

Loaded 77652 variants
Loaded 503 individuals
HDF5 loaded from ./Data/1000Genomes/HDF5/1240kHDF5/Eur1240chr3.hdf5

Intersection on Positions: 77652
Nr of Matching Refs: 77652 / 77652
Full Intersection Ref/Alt Identical: 77652 / 77652
396 / 503 Individuals included in Reference
Extraction of 792 Haplotypes Complete!
Markers called 77652 / 77652
Successfully saved to: ./Simulated/1000G_Mosaic/TSI/ch3_5cm/output/iid0/chr3/testDIPLOID/
Successfully loaded Data from: ./Simulated/1000G_Mosaic/TSI/ch3_5cm/output/iid0/chr3/testDIPLOID/
Loaded Emission Model: diploid_gt
Loaded Transition Model: model
Loaded Post Processing Model: Standard
Minimum Genetic Map: 0.0000
Maximum Genetic Map: 2.2326
Gaps bigger than 0.1 cM: 214
Maximum Gap: 0.2348 cM
Loaded Transition and Emission Matrix:
(3, 3