In [111]:
import numpy as np
import pandas as pd
from pomegranate import *
import random

In [112]:
df = pd.read_csv('AdjustedRPKMOutput/RPF_chrE.txt',sep='\t')

In [113]:
df.head(5)

Unnamed: 0,AccNum,GeneName,cdReads0,cdRPKM0,cdReads1,cdRPKM1,cdReads2,cdRPKM2,cdReads3,cdRPKM3,cdReads4,cdRPKM4
0,NM_017847,ODR4,93.0,16.468792,62.0,13.189469,49.0,14.172717,37.0,9.403085,39.0,10.487249
1,NM_001143986,TLE6,4.0,0.562465,2.0,0.337849,2.0,0.45935,1.0,0.201802,1.0,0.213527
2,NM_001003803,ATP5S,81.0,30.21492,83.0,37.193868,71.0,43.258655,46.0,24.625447,26.0,14.727463
3,NM_001003800,BICD2,501.0,47.157866,389.0,43.986848,284.0,43.662942,257.0,34.716778,230.0,32.874747
4,NM_016649,ESF1,69.0,6.525288,52.0,5.907596,41.0,6.333047,26.0,3.528692,26.0,3.733723


### Get first 100 values of RPF data to test

In [123]:
X = np.log2(df[['cdRPKM0','cdRPKM1','cdRPKM2','cdRPKM3','cdRPKM4']].values).astype(int)

In [124]:
print(X.shape)

(11745, 5)


In [125]:
bin_size = 50 #state number of bins here, multiple of 5
step = (np.max(X)-np.min(X))/bin_size
bins = np.arange(np.min(X),np.max(X)+0.1,step)
print(bins)
X = np.digitize(X,bins)

[-5.   -4.64 -4.28 -3.92 -3.56 -3.2  -2.84 -2.48 -2.12 -1.76 -1.4  -1.04
 -0.68 -0.32  0.04  0.4   0.76  1.12  1.48  1.84  2.2   2.56  2.92  3.28
  3.64  4.    4.36  4.72  5.08  5.44  5.8   6.16  6.52  6.88  7.24  7.6
  7.96  8.32  8.68  9.04  9.4   9.76 10.12 10.48 10.84 11.2  11.56 11.92
 12.28 12.64 13.  ]


In [126]:
print(X[:5])

[[25 23 23 23 23]
 [14 12 12  9  9]
 [25 28 28 25 23]
 [28 28 28 28 28]
 [20 20 20 17 17]]


### Sample network

In [127]:
model = BayesianNetwork.from_samples(X,algorithm='chow-liu')

In [128]:
model.structure

((), (0,), (1,), (2,), (3,))

### Initialize 4 networks

In [134]:
K = 4
models = []
x_i = []
for i in range(K):
    j = random.randint(0,10000)
    print(j)
    x_temp = X[j:j+10]
    x_i.append(x_temp)
    model = BayesianNetwork.from_samples(x_temp,algorithm='chow-liu') 
    models.append(model)
for model in models:
    print(model.structure)

9620
3205
3646
881
((), (0,), (0,), (0,), (1,))
((), (0,), (0,), (0,), (2,))
((), (4,), (1,), (0,), (3,))
((), (2,), (0,), (2,), (2,))


#### EM Step

In [138]:
for x in X:
    probs = []
    for idx,model in enumerate(models):
        try:
            prob = model.probability(x)
        except KeyError:
            x_temp = np.vstack((x_i[idx],x))
            x_i[idx]=x_temp
            model = BayesianNetwork.from_samples(x_temp,algorithm='chow-liu') 
            prob = model.probability(x)
        probs.append(prob)
    print(probs)

[0.013667601664133822, 0.03207957033388068, 0.075, 0.0192]
[0.013339920948616609, 0.018048128342246, 0.024302430243024295, 0.021721641190667752]
[0.0009412975629191839, 0.0, 0.0, 0.0]
[0.011444381563227054, 0.017842660178426593, 0.0, 0.02144249512670565]
[0.023357305304566978, 0.03892287727104191, 0.04975332068311194, 0.04507804370447449]
[0.06890448998748434, 0.10000000000000002, 0.19999999999999998, 0.0]
[0.006947637292464881, 0.009249743062692701, 0.010298971307427305, 0.009325265251989383]
[0.07234449760765548, 0.10000000000000002, 0.19999999999999998, 0.0]
[0.0032826841203428964, 0.0, 0.0, 0.0]
[0.008008928571428568, 0.010604926108374381, 0.010227272727272727, 0.0]
[0.029904479507325086, 0.0, 0.05000000000000001, 0.05760000000000002]
[0.025457605722701452, 0.031236177838785933, 0.0, 0.05760000000000002]
[0.008954008954008958, 0.0187793427230047, 0.015873015873015876, 0.01424501424501424]
[0.0008146293860579571, 0.0, 0.0, 0.0]
[0.010929492628664418, 0.0, 0.02200505552044777, 0.0197

[0.01382590252560616, 0.01121775724507812, 0.025912121951963865, 0.022358459512837395]
[0.016828778983512226, 0.0, 0.0, 0.0]
[0.00010453216374268997, 0.0, 0.0, 0.0]
[0.06689412756924505, 0.10000000000000002, 0.19999999999999998, 0.0]
[0.06938461538461539, 0.10000000000000002, 0.19999999999999998, 0.0]
[0.0036922168069709067, 0.011802158552682658, 0.0, 0.01421623643845866]
[0.004441285152830734, 0.01462621885157097, 0.0, 0.017601043024771855]
[0.0002571855026981513, 0.0, 0.0, 0.0]
[0.012807051037291583, 0.01741519556940118, 0.024518762183235863, 0.020937369954223878]
[0.06765162158997773, 0.10000000000000002, 0.19999999999999998, 0.0]
[0.008494501327265834, 0.01157622739018088, 0.01626724763979666, 0.013904407200496583]
[0.2, 0.10000000000000002, 0.0, 0.0]
[0.0005487829780282614, 0.0, 0.0, 0.0]
[0.033831444522448455, 0.04988246617432025, 0.10000000000000002, 0.059858959409184294]
[0.2, 0.05899495281983761, 0.22499999999999995, 0.0]
[0.06840099027599025, 0.10000000000000002, 0.1999999999

KeyboardInterrupt: 