In [12]:
import numpy as np
import os as os
import sys as sys
import multiprocessing as mp
import pandas as pd
import socket
import matplotlib.pyplot as plt

### Do the Arial 
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'   # Set the default
rcParams['font.sans-serif'] = ['Arial']  # Make sure to have the font installed (it is on cluster for Harald)

### Pick the right path (whether on cluster or at home)
socket_name = socket.gethostname()
print(socket_name)
if socket_name == "VioletQueen":
    path = "/home/harald/git/HAPSBURG/"   # The Path on Harald's machine
elif socket_name.startswith("bionc21"):
    print("Leipzig head node detected.")
    path = "/mnt/archgen/users/hringbauer/git/ped-sim/"  # The Path on Midway Cluster
elif socket_name.startswith("midway2"):
    print("Midway jnovmbre partition detected.")
    path = "/project2/jnovembre/hringbauer/ped-sim/"  # The Path on Midway Cluster
elif socket_name.startswith("Harald-Laptop"):
    print("Harald's new laptop detected!")
    path = "/home/hringbauer/git/ped-sim/" 
elif socket_name.startswith("compute-"):
    print("HSM Computational partition detected.")
    path = "/n/groups/reich/hringbauer/git/ped-sim/"  # The Path on Midway Cluster
else: 
    raise RuntimeWarning(f"Not compatible machine: {socket_name}. Check!!")
    
os.chdir(path)  # Set the right Path (in line with Atom default)

sys.path.append("./package/")  # Go to the hapsburg package directory

from hapsburg.PackagesSupport.pp_individual_roh_csvs import post_process_roh_df, combine_ROH_df, calc_average_roh
from hapsburg.figures.plot_bars import plot_panel_row, prepare_dfs_plot, create_cousins_roh

print(os.getcwd()) # Show the current working directory. Should be HAPSBURG/Notebooks/ParallelRuns
print(f"CPU Count: {mp.cpu_count()}")

bionc21
Leipzig head node detected.
/mnt/archgen/users/hringbauer/git/ped-sim
CPU Count: 40


### Installation Pedsim
git clone ...

module load boost

make

### Run Pedsim

In [17]:
def run_pedsim(par_file="par/ibd/gp1.def",
               output="output/ibd/gp1",
               gmap="map/refined_mf.simmap",
               o_file="output/ibd/gp1",
               i_file="interfere/nu_p_campbell.tsv"):
    """Wrapper to run pedsim"""
    !./ped-sim -d $par_file -m $gmap -o $output --intf $i_file

### Run ancestral relationships

In [39]:
#run_pedsim(par_file="par/ibd/gp1.def", output="output/ibd/gp1")
#run_pedsim(par_file="par/ibd/gp2.def", output="output/ibd/gp2")
#run_pedsim(par_file="par/ibd/gp3.def", output="output/ibd/gp3")
run_pedsim(par_file="par/ibd/parent.def", output="output/ibd/parent")

Pedigree simulator!  v1.1.16    (Released  8 Feb 2021)

  Def file:		par/ibd/parent.def
  Map file:		refined_mf.simmap
  Input VCF:		[none: no genetic data]
  Output prefix:	output/ibd/parent

  Random seed:		3075670188

  Interference file:	interfere/nu_p_campbell.tsv

Simulating haplotype transmissions... done.
Printing IBD segments... done.

To simulate genetic data, must use an input VCF with 300 founders.


### Avuncular Relationships

In [37]:
#run_pedsim(par_file="par/ibd/av1.def", output="output/ibd/av1")
#run_pedsim(par_file="par/ibd/av2.def", output="output/ibd/av2")
#run_pedsim(par_file="par/ibd/av3.def", output="output/ibd/av3")
#run_pedsim(par_file="par/ibd/av4.def", output="output/ibd/av4")
run_pedsim(par_file="par/ibd/av5.def", output="output/ibd/av5")

Pedigree simulator!  v1.1.16    (Released  8 Feb 2021)

  Def file:		par/ibd/av5.def
  Map file:		refined_mf.simmap
  Input VCF:		[none: no genetic data]
  Output prefix:	output/ibd/av5

  Random seed:		1933301268

  Interference file:	interfere/nu_p_campbell.tsv

Simulating haplotype transmissions... done.
Printing IBD segments... done.

To simulate genetic data, must use an input VCF with 700 founders.


In [24]:
### Sib Relationships
run_pedsim(par_file="par/ibd/sib.def", output="output/ibd/sib")
#run_pedsim(par_file="par/ibd/hsib.def", output="output/ibd/hsib")

Pedigree simulator!  v1.1.16    (Released  8 Feb 2021)

  Def file:		par/ibd/sib.def
  Map file:		refined_mf.simmap
  Input VCF:		[none: no genetic data]
  Output prefix:	output/ibd/sib

  Random seed:		3038461378

  Interference file:	interfere/nu_p_campbell.tsv

Simulating haplotype transmissions... done.
Printing IBD segments... done.

To simulate genetic data, must use an input VCF with 200 founders.


### Test Run

In [21]:
%%time
run_pedsim(par_file="par/ibd_sim/gp2.def", 
           output="output/ibd_test")
#run_pedsim(par_file="par/ibd/hsib.def", output="output/ibd/hsib")

Pedigree simulator!  v1.4.2    (Released 24 Aug 2024)

  Def file:		par/ibd_sim/gp2.def
  Map file:		map/refined_mf.simmap
  Input VCF:		[none: no genetic data]
  Output prefix:	output/ibd_test

  Random seed:		3332550704

  Interference file:	interfere/nu_p_campbell.tsv

Simulating haplotype transmissions... done.
Printing IBD segments... done.

To simulate genetic data, must use an input VCF with 400 founders.
CPU times: user 28.6 ms, sys: 13 ms, total: 41.6 ms
Wall time: 1.33 s


# Run simulations for Ben's IBD classifier

In [20]:
%%time

run_pedsim(par_file="par/ibd_sim/gp2.def", 
           output="output/ibd_test"

test


# Run all GP relatives
GP2 = standard gp (2 Generations apart, 2 degrees)

GP3 = standard gp + 1G (3 Gen. apart, 3 degrees)

...

In [None]:
%%time
## takes 50s
rels = [f"gp{i}" for i in range(1,9)]
for r in rels:
    path_in = f"par/ben10k/{r}.def"
    path_out = f"output/ben10k/{r}"
    run_pedsim(par_file=path_in, output=path_out)

### Run all AV relativs
AV2 = standard avuncular (1 gen apart, 2 degrees)

AV3 = standard av + 1gen (2 gen apart, 3 degrees)

...

In [None]:
%%time
## takes 55s
rels = [f"av{i}" for i in range(2,10)]
for r in rels:
    path_in = f"par/ben10k/{r}.def"
    path_out = f"output/ben10k/{r}"
    run_pedsim(par_file=path_in, output=path_out)