In [15]:
import pandas as pd
import numpy as np
import itertools
from tqdm import tqdm
import random
import functools

import pomegranate
from pomegranate import HiddenMarkovModel ,State
from pomegranate.distributions import IndependentComponentsDistribution
from pomegranate.distributions import NormalDistribution

import pickle as pkl

# build the meta network

In [2]:
#params :
n_dim_of_chain=8
n_of_chains = 5
possible_number_of_walks = [1,2,3,4]

In [3]:
def generate_all_binarys(n, arr, i): 
    if i==n :
        all_binarys.append(np.nonzero(arr.copy())[0].tolist())
        return 

    arr[i] = 0
    generate_all_binarys(n, arr, i + 1)  

    arr[i] = 1
    generate_all_binarys(n, arr, i + 1) 

def build_IX_mock(n) :
    imx_possible_walks = {}

    for cell_idx in range(n):
        number_of_walks = np.random.choice(possible_number_of_walks)
        walks_idx = np.random.choice(range(n),number_of_walks,False)
        imx_possible_walks[cell_idx] = walks_idx
    return imx_possible_walks
        
def build_network_from_IX(n,all_binarys,imx_possible_walks) : 
    network_dic = {} #key:cell vec, value : all conncted cell vecs
    pathway_network = {}
    
    for cell_vec in tqdm(all_binarys) : 
        if len(cell_vec) == 0  :
            continue

        possible_walk_for_idx_matrix = [imx_possible_walks[cell_idx] for cell_idx in cell_vec]
        all_possible_walks_from_cell = [set(comb) for comb in itertools.product(*possible_walk_for_idx_matrix)]

        network_dic[tuple(cell_vec)] = all_possible_walks_from_cell
        pathway_network[tuple(cell_vec)] = np.random.choice([1,2,3,4,5],len(all_possible_walks_from_cell))
    return network_dic,pathway_network

def build_model_networks(n_of_chains,n,all_binarys,imx_possible_walks) : 
    model_networks = {}
    pathway_networks = {}
    for i in range(n_of_chains) : 
        _net_walks,_net_pathways_att = build_network_from_IX(n,all_binarys,imx_possible_walks)
        model_networks[i] = _net_walks
        pathway_networks[i] = _net_pathways_att
    return model_networks,pathway_networks

def build_pathways_mock(imx_possible_walks) : 
    hard_walk_to_pathways_map = {}
    for vec in imx_possible_walks : 
        path = np.random.choice([1,2,3,4])
        hard_cells_to_pathways_map[tuple(vec)] = path
    return hard_walk_to_pathways_map

all_binarys = [] 
generate_all_binarys(n_dim_of_chain,[None]*n_dim_of_chain,0)
imx_possible_walks = build_IX_mock(n_dim_of_chain)
model_networks,pathways_network = build_model_networks(n_of_chains,n_dim_of_chain,all_binarys,imx_possible_walks)

100%|██████████████████████████████████████████████████████████████████████████████| 256/256 [00:00<00:00, 2251.57it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 256/256 [00:00<00:00, 3055.90it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 256/256 [00:00<00:00, 2999.87it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 256/256 [00:00<00:00, 3666.69it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 256/256 [00:00<00:00, 4066.80it/s]


In [4]:
#all the naive comb of states across chains
all_curr_state_comb_between_networks = itertools.product(*[network for network in model_networks.values()])

#we belive we can filter base on the real data
filtered_curr_state_comb_between_networks = itertools.islice(all_curr_state_comb_between_networks,10000000)
# filtered_curr_state_comb_between_networks = itertools.islice(all_curr_state_comb_between_networks,100)
#now we need to find all possible combinations of walks : 
#we start by building all comb of walks across chains for all comb of states across chain
all_walks_across_chains = map(lambda comb:[model_networks[net_idx][chain_state] for net_idx,chain_state in enumerate(comb)],filtered_curr_state_comb_between_networks)

In [5]:
def is_walk_in_path(x,_path):
    return random.random() < 0.1

def return_filtered_walks_per_curr_chain(_walks_per_curr_chain,path):
    return list(map(lambda walk:list(filter(lambda x:is_walk_in_path(x,path),walk)),_walks_per_curr_chain))

In [6]:
#we now build diffrent stream for each pathway - **only comb where all the walks are in the same pathway are possible **
def filter_comb_of_walks_across_chains(_walks_per_curr_comb):
    all_combs_of_walks_all_paths = []
    for _path in [1,2,3,4,5] : 

        #keep only walks in the pathway    
        _walks_per_curr_comb_in_path = return_filtered_walks_per_curr_chain(_walks_per_curr_comb,_path)
        
        #its smart to filter out first comb of walks where there is at least one chain with no walk in this pathway :
        if any([len(_walks)==0 for _walks in _walks_per_curr_comb_in_path]):
            continue

        _combs_of_walks = list(itertools.product(*_walks_per_curr_comb_in_path))
        all_combs_of_walks_all_paths = all_combs_of_walks_all_paths +  _combs_of_walks
        
    return all_combs_of_walks_all_paths

filt_comb_walks_across_chains = map(lambda _walks_per_curr_comb:filter_comb_of_walks_across_chains(_walks_per_curr_comb),all_walks_across_chains) 

#zip the combinations of states with the comb of walks 
state_comb_to_walks_comb = zip(filtered_curr_state_comb_between_networks,filt_comb_walks_across_chains)

#now we filter out comb of walks and combs of states where there is no possible walk from this current state 
state_comb_to_walks_comb = filter(lambda _walks : len(_walks[1]) >0 ,state_comb_to_walks_comb)

explanation of the results "state_comb_to_walks_comb" :

state_comb_to_walks_comb[0] : states - state of every network

state_comb_to_walks_comb[1] : walks - list of possible comb - every row contains :

state_comb_to_walks_comb[1][i] : state j from state_comb_to_walks_comb[0] walk to state state_comb_to_walks_comb[1][i]

# save the results

In [7]:
def build_long_state_vector(set_of_states):
    def build_long_state(small_state,i) : 
        return [dim + i*n_dim_of_chain  for dim in small_state]
        
    state_vector = [build_long_state(small_state,i) for small_state,i in  zip(set_of_states,range(len(set_of_states)))]
    flatten = [item for sublist in state_vector for item in sublist]
    return frozenset(flatten)

In [8]:
# final_state_comb_to_walks_comb = {}

# with tqdm(100) as pbar : 
#     for sample in state_comb_to_walks_comb : 
#         curr_state = build_long_state_vector(sample[0])
        
#         next_possible = [build_long_state_vector(_next) for _next in sample[1]]
        
#         final_state_comb_to_walks_comb[curr_state] = next_possible
#         pbar.update(1)

In [9]:
# import pickle as pkl
# with open("markov_network",'wb') as f : 
#     pkl.dump(final_state_comb_to_walks_comb,f)

In [10]:
# import pickle as pkl
# with open("markov_network",'rb') as f : 
#     t = pkl.load(f)

# build the network

## pick the relvent states

In [11]:
def return_relevant_multi_distribution(state_vactor) : 
    multi_hot_vector_state = np.zeros((n_of_chains*n_dim_of_chain,1))
    multi_hot_vector_state[list(state_vactor)] = 1
    list_of_normal_dist = [NormalDistribution(dim,1) for dim in multi_hot_vector_state]
    return IndependentComponentsDistribution(list_of_normal_dist) 

def return_relevant_state(state_vector) : 
    d = return_relevant_multi_distribution(state_vector)
    state_name = str(sorted(state_vector))
    return State(d,state_name)

In [20]:
# for now we take random number of states :
first = True
how_much_to_take = 1000

with tqdm(how_much_to_take) as pbar : 
    state_holder ={}

    markov_model = HiddenMarkovModel('first_try')
    for sample,_ in zip(state_comb_to_walks_comb,range(how_much_to_take)) : 
        curr_state = build_long_state_vector(sample[0])

        if curr_state not in state_holder.keys():
            curr_pomp_state = return_relevant_state(curr_state)
            markov_model.add_states(curr_pomp_state)
            state_holder[curr_state] = curr_pomp_state
        else : 
            curr_pomp_state = state_holder[curr_state]

        for _next in sample[1] : 
            next_possible = build_long_state_vector(_next)
            if next_possible not in state_holder.keys():
                next_pomp_state = return_relevant_state(next_possible)
                markov_model.add_states(next_pomp_state)
                state_holder[next_possible] = next_pomp_state
            else : 
                next_pomp_state = state_holder[next_possible]
            
            if first : 
                markov_model.add_transition(markov_model.start,curr_pomp_state,1)
                first = False
            markov_model.add_transition(curr_pomp_state,next_pomp_state,1)
        pbar.update(1)
    markov_model.add_transition(next_pomp_state,markov_model.end,1)
        

10000it [03:41, 45.18it/s]


In [21]:
baked_markov_model = markov_model.bake()

In [13]:
raise Exception("finished")

Exception: finished

In [45]:
with open("sampled_seqs.pkl",'rb') as f : 
    sampled_seqs = pkl.load(f)

In [49]:
markov_model = markov_model.fit(sampled_seqs)

In [53]:
markov_model.viterbi(sampled_seqs[5])

(-inf, None)

In [None]:
# fit the distrbution to the real data - 
## find close samples - by my own function
## fit by thet samples 

In [None]:
#predict params

In [None]:
#best trajc