# Class that implements Emission Matrix for IBD detection

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import socket as socket
import os as os
import sys as sys
import multiprocessing as mp
import h5py
import allel

socket_name = socket.gethostname()
print(socket_name)

if socket_name.startswith("compute-"):
    print("HSM O2 Computational partition detected.")
    path = "/n/groups/reich/hringbauer/git/hapBLOCK/"  # The Path on Harvard Cluster
else: 
    raise RuntimeWarning("Not compatible machine. Check!!")

os.chdir(path)  # Set the right Path (in line with Atom default)

print(os.getcwd())
print(f"CPU Count: {mp.cpu_count()}")

compute-a-17-86.o2.rc.hms.harvard.edu
HSM O2 Computational partition detected.
/n/groups/reich/hringbauer/git/hapBLOCK
CPU Count: 32


In [3]:
"""
Class for calculating Emission Probabilities.
Contains Sub-Classes, as well as factory Method.
@ Author: Harald Ringbauer, 2019, All rights reserved
"""

###############################
###############################


class Emissions(object):
    """Class for emission probabilities
    Has methods to return emission probabilities"""

    def give_emission_matrix(self, remember=False):
        """Return Emission Matrix - for every possible set of states"""
        raise NotImplementedError("Implement This in specific subclass.")

    def give_emission_state(self, ob_stat):
        """Gives the emission matrix of path of states"""
        raise NotImplementedError("Implement This in specific subclass.")
    
    def give_emission_log(self, ob_stat, dtype=np.float):
        """Return the full emission Probability directly in Log Space. 
        ob_stat: Observed Readcounts [2,l] array of 0/1 """
        raise NotImplementedError("Implement This in specific subclass.")

    def set_params(self, **kwargs):
        """Set the Parameters.
        Takes keyworded arguments"""
        for key, value in kwargs.items():
            setattr(self, key, value)

In [5]:
### Input: Two genotype probability arrays: in format [lx3x2]
### Output: Emission matrix of form [lx5] 1 Background state. 4 copying states

In [35]:
### Describe Background State
t0 = np.stack((p**2, 2*(1-p)*p, (1-p)**2), axis=1)
#m0 = t0 . gt1

In [30]:
m0


array([[0.81, 0.81, 0.81, 0.81],
       [0.18, 0.18, 0.18, 0.18],
       [0.01, 0.01, 0.01, 0.01]])

In [15]:
### Describe i/j sharing

In [37]:
np.shape(m0)

(3, 4)

In [36]:
np.shape(t0)

(4, 3)

### Test data

In [28]:
gt1 = np.array([[0,1,0],[0,0,1], [1,0,0],[0,1,0]])
gt2 = np.array([[0,1,0],[0,0,1], [0,0,1],[0,0,1]])
gts = np.stack((gt1, gt2))
p = np.array([0.9,0.9,0.9,0.9])

In [13]:
### 

array([[[0, 1, 0],
        [0, 0, 1],
        [1, 0, 0],
        [0, 1, 0]],

       [[0, 1, 0],
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1]]])