In [1]:
import pandas as pd
import pickle as pkl
import numpy as np
import json
from operator import itemgetter
from itertools import product
from BF_generators import *

In [2]:
def check_conflicting_constraints (rows, rows_output):
    '''
    rows: all the truth table rows which are constrainted by the attractor
    rows_output:  all the outputs of the rows of the truth table which
    are constrainted by the attractor
    returns rows and the row outputs with no conflicting constraints
    '''
    D, remove_indices = {}, []
    for element in set(rows):
        D[element] = [index for index, ele in enumerate(rows) if ele == element]
    for element in D:
        if len(set(rows_output[D[element]])) > 1:
            remove_indices += D[element]
            print ('There exists a conflict at row :', element)
    for index in sorted(remove_indices, reverse=True):
        rows = np.delete(rows, index)
        rows_output = np.delete(rows_output, index)
    return rows, rows_output


def attr_constr_funcs(node_num, in_edges, attr_as_matrix):
    '''
    -node_num: a dictionary where each gene is associated with an integer
    -inedges: a dictionary where each gene is associated with its regulatory inputs
    -attr_as_matrix: the attractor specified as a matrix
    returns a list of BFs in the order in 'node_num' which satisfy the
    attractor constraints
    NOTE: those rows of the truth table where a function output is replaced
    multiple times have to be excluded from the constraint
    '''
    attr_constr_BFs = dict()
    for node in node_num:
        i = node_num[node]
        x = np.array(attr_as_matrix[i], str)
        attr_constr_rows = np.zeros(len(attr_as_matrix.T), str)
        for in_edge in in_edges[i]:
            attr_constr_rows = np.char.add(attr_constr_rows, np.array(attr_as_matrix[in_edge], str))
        rows = np.array([int(ele,2) for ele in attr_constr_rows])
        rows, x = check_conflicting_constraints(rows, x) #Check for conflicting constraints for rows at the node under consideration
        tt = np.array (2**len(in_edges[i])*['x'])
        tt[rows] = x
        attr_constr_BFs[node_num[node]] = ''.join(tt)
    return attr_constr_BFs


def attr_constr_funcs_with_cycles(node_num, in_edges, attr_as_matrix, cycles):
    """
    Construct Boolean functions (as truth‐table strings) that realize all given attractor‐cycles.

    Parameters
    ----------
    node_num : dict
        Maps node name to its integer index, e.g. {'v_Bcatenin_H': 0, …}
    in_edges : dict
        Maps each node index to a list of its regulator indices,
        e.g. {0: [10], 1: [8,11,15], …}
    attr_as_matrix : np.ndarray, shape (n_nodes, T)
        The attractor‐matrix (rows = nodes, cols = time‐indices).
    cycles : list of lists
        Each inner list is an ordered cycle of column‐indices in A, e.g. [[0,1,2], [3,4,5,6], …]

    Returns
    -------
    attr_constr_BFs : dict
        Maps each node index to a string of length 2^k (k = #regulators) over {'0','1','x'},
        representing its partially‐specified truth table.
    """
    attr_constr_BFs = {}

    for node, i in node_num.items():
        rows_all = []
        outs_all = []

        for C in cycles:
            L = len(C)
            for idx, t in enumerate(C):
                t_next = C[(idx + 1) % L]
                bits = [str(attr_as_matrix[j, t]) for j in in_edges[i]]
                s = ''.join(bits)
                r = int(s, 2)
                rows_all.append(r)
                outs_all.append(str(attr_as_matrix[i, t_next]))

        rows_all = np.array(rows_all, dtype=int)
        outs_all = np.array(outs_all, dtype=str)
        rows, x = check_conflicting_constraints(rows_all, outs_all)
        d = len(in_edges[i])
        tt = np.array(['x'] * (2**d), dtype=str)
        tt[rows] = x
        attr_constr_BFs[i] = ''.join(tt)
    return attr_constr_BFs


def combine_fxd_pt_and_cycle_constraint(fp_constr_dict, cycle_constr_dict):
    merged = {}
    for k, a in fp_constr_dict.items():
        b = cycle_constr_dict[k]
        if len(a) != len(b):
            raise ValueError(f"Length mismatch for '{k}'")
        s = []
        for ai, bi in zip(a, b):
            if ai == bi or ai == 'x':
                s.append(bi)
            elif bi == 'x':
                s.append(ai)
            else:
                raise ValueError(f"Conflict in '{k}': {ai} vs {bi}")
        merged[k] = ''.join(s)
    return merged


class constrainBF:
    '''
    #functionality
    given a BF constrained at some rows, this class gives the EFs, scEUFs, scRoFs or scNCFs that satisfy those 
    constraints
    '''
    def __init__(self, BF, sign):
        '''
        #arguments
        BF: Boolean function as a string of bits '11xx'; x can be 0 or 1
        sign: string of 'a' and 'i'; 'ai' where 'a' is activatory and 'i' is inhibitory
        '''
        self.BF = BF
        self.sign = sign
        self.constr_rows = [ind for ind, bit in enumerate(self.BF) if bit != 'x']
        self.fixed_bits = list(itemgetter(*self.constr_rows)(self.BF))

    def with_scNCF (self):
        k = len(self.sign)
        possible_scNCFs = []
        inh_edges = [k-index for index, s in enumerate(self.sign) if s == 'i']

        if 'x' not in self.BF:
            if check_if(k, self.BF).conforms_to_edge_signs(self.sign):
                if check_if(k, self.BF).is_NCF():
                    possible_scNCFs += [int(self.BF, 2)]
            else:
                print ('Given BF cannot be scNCF')
                
        elif '0' not in self.BF and '1' not in self.BF:
            with open (fr'../../../../computational/NCF_perms/all_perms_NCF{k}.txt', 'r') as file:
                for func in file:
                    func = bf(k, bin(int(func.strip('\n')))[2:].zfill(2**k)).swap_rows(inh_edges)
                    possible_scNCFs += [int(func, 2)]
            
        else:
            with open (fr'../../../../computational/NCF_perms/all_perms_NCF{k}.txt', 'r') as file:
                for func in file:
                    func = bf(k, bin(int(func.strip('\n')))[2:].zfill(2**k)).swap_rows(inh_edges)
                    if list(itemgetter(*self.constr_rows)(func)) == self.fixed_bits:
                        possible_scNCFs += [int(func, 2)]
        
        return possible_scNCFs

In [3]:
# The models that contain non-NCF functions have the following IDs: 61, 69, 95, 212. The count of biological 
# attractors of the models are following: (1) 61: 2 fixed points; (2) 69: 4 cycles of lengths 9, 2, 9, 2
# (3) 95: 1 fixed point and one cycle of length 3 (4) 212: 6 fixed points.

model_num = 69

with open(f'../input/model_{model_num}/node_num_swap.pkl', 'rb') as file1:
    node_num_swap = pkl.load(file1)
with open(f'../input/model_{model_num}/inedges.pkl', 'rb') as file2:
    in_edges = pkl.load(file2)
with open(f'../input/model_{model_num}/int_func.pkl', 'rb') as file3:
    int_func = pkl.load(file3)
with open(f'../input/model_{model_num}/signs.pkl', 'rb') as file3:
    signs = pkl.load(file3)

node_num = {v: k for k, v in node_num_swap.items()}   
N = len(node_num)
func_dict = {i: None for i in range(N)}
func_type_df = pd.read_csv(f'../input/model_{model_num}/func_type_details_{model_num}.tsv', sep = '\t')

node_index_non_NCFs = [i for i in func_type_df.index if func_type_df.loc[i, 'NCF'] == 'No']

if model_num in [61, 212]:
    fxd_pt_attrs = pd.read_csv(f'../input/biological_att_files/attractor_model_{model_num}.tsv', sep = '\t')
    fxd_pt_attr_as_matrix = fxd_pt_attrs.drop('node', axis = 1).to_numpy()
    BF_dict = attr_constr_funcs(node_num, in_edges, fxd_pt_attr_as_matrix)

if model_num == 95:
    fxd_pt_attrs = pd.read_csv(f'../input/biological_att_files/attractor_model_{model_num}.tsv', sep = '\t')
    fxd_pt_attr_as_matrix = fxd_pt_attrs.drop('node', axis = 1).to_numpy()

    cyclic_attrs = pd.read_csv(f'../input/biological_att_files/cyclic_attractor_model_{model_num}.tsv', sep = '\t')
    cyclic_attrs_as_matrix = cyclic_attrs.drop('node', axis = 1).to_numpy()
    cycles = [[0,1,2]]

    fp_constr_dict = attr_constr_funcs(node_num, in_edges, fxd_pt_attr_as_matrix)
    cycle_constr_dict = attr_constr_funcs_with_cycles(node_num, in_edges, cyclic_attrs_as_matrix, cycles)
    BF_dict = combine_fxd_pt_and_cycle_constraint(fp_constr_dict, cycle_constr_dict)

if model_num == 69:
    cyclic_attrs = pd.read_csv(f'../input/biological_att_files/cyclic_attractor_model_{model_num}.tsv', sep = '\t')
    cyclic_attrs_as_matrix = cyclic_attrs.drop('node', axis = 1).to_numpy()
    cycles = [[0,1,2,3,4,5,6,7,8], [9, 10], [11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21]]
    BF_dict = attr_constr_funcs_with_cycles(node_num, in_edges, cyclic_attrs_as_matrix, cycles)

for i in range(N):
    if i in node_index_non_NCFs:
        func_dict[i] = constrainBF(BF_dict[i], signs[i]).with_scNCF()
    else:
        func_dict[i] = [int_func[i]]

After replacing the non-NCFs with sign confirming NCFs the number of plausible models that satisfy the fixed point constraints are the following: (1) 61 $\rightarrow$ 686244, (2) 69 $\rightarrow$ 96, (3) 95 $\rightarrow$ 2116 (4) 212 $\rightarrow$ 48. We save all the plausible models for the model ids 69, 95 and 212. For 61 we do a sampling.

In [4]:
if model_num in [69,95,212]:
    all_model_combs = list(product(*[func_dict[key] for key in sorted(func_dict.keys())]))
    all_model_combs_str = [json.dumps(list(c)) for c in all_model_combs]
    df = pd.DataFrame({
        'model_no': range(len(all_model_combs_str)),
        'func_type': 'all_scNCF',
        'model': all_model_combs_str
    })

    #df.to_csv(f"../output/bio_model_{model_num}_non_NCF_replaced_by_NCF.tsv", sep="\t", index=False)

In [5]:
df

Unnamed: 0,model_no,func_type,model
0,0,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 7, 4, 2, 61491023382737..."
1,1,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 7, 4, 2, 61491023380724..."
2,2,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 7, 4, 2, 61491023380050..."
3,3,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 7, 4, 2, 61491023369315..."
4,4,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 7, 4, 2, 61491023380042..."
...,...,...,...
91,91,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 5701751, 4, 2, 61491023..."
92,92,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 5701751, 4, 2, 61491023..."
93,93,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 5701751, 4, 2, 61963901..."
94,94,all_scNCF,"[2, 2, 2, 1, 1, 2, 42, 5701751, 4, 2, 61851311..."


In [6]:
if model_num == 61:
    all_model_combs = list(product(*[func_dict[key] for key in sorted(func_dict.keys())]))
    rng = np.random.default_rng(42)          
    idx = rng.choice(len(all_model_combs), size=5000, replace=False)
    sampled = [all_model_combs[i] for i in idx]
    all_model_combs_str = [json.dumps(list(c)) for c in sampled]
    
    df = pd.DataFrame({
    'model_no': range(len(all_model_combs_str)),
    'func_type': 'all_scNCF',
    'model': all_model_combs_str
    })
    
    #df.to_csv(f"../output/bio_model_{model_num}_non_NCF_replaced_by_NCF.tsv", sep="\t", index=False)