In [2]:
!pip install tqdm

Collecting tqdm
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1


In [24]:
import os
import pandas as pd
import numpy as np
from scipy.integrate import solve_ivp
from collections import Counter
from tqdm import trange
import pprint
from IPython.display import display, Math

# SymPy vetting

In [25]:
import sympy as sp

# 1) Declare symbols for genes
A, B, C, D = sp.symbols('A B C D')
genes = [A, B, C, D]

# 2) Declare symbols for all your parameters
#    (example names—replace with your full param_names list)
gA, gB, gC, gD = sp.symbols('Prod_of_A Prod_of_B Prod_of_C Prod_of_D')
dA, dB, dC, dD = sp.symbols('Deg_of_A Deg_of_B Deg_of_C Deg_of_D')
# inhibition params:
lamBA, KBA, nBA = sp.symbols('Inh_of_BToA Trd_of_BToA Num_of_BToA')
lamCA, KCA, nCA = sp.symbols('Inh_of_CToA Trd_of_CToA Num_of_CToA')
lamDA, KDA, nDA = sp.symbols('Inh_of_DToA Trd_of_DToA Num_of_DToA')

lamAB, KAB, nAB = sp.symbols('Inh_of_AToB Trd_of_AToB Num_of_AToB')
lamCB, KCB, nCB = sp.symbols('Inh_of_CToB Trd_of_CToB Num_of_CToB')
lamDB, KDB, nDB = sp.symbols('Inh_of_DToB Trd_of_DToB Num_of_DToB')

lamAC, KAC, nAC = sp.symbols('Inh_of_AToC Trd_of_AToC Num_of_AToC')
lamBC, KBC, nBC = sp.symbols('Inh_of_BToC Trd_of_BToC Num_of_BToC')
lamDC, KDC, nDC = sp.symbols('Inh_of_DToC Trd_of_DToC Num_of_DToC')

lamAD, KAD, nAD = sp.symbols('Inh_of_AToD Trd_of_AToD Num_of_AToD')
lamBD, KBD, nBD = sp.symbols('Inh_of_BToD Trd_of_BToD Num_of_BToD')
lamCD, KCD, nCD = sp.symbols('Inh_of_CToD Trd_of_CToD Num_of_CToD')

# … repeat for all 12 edges …

# 3) Build each dX/dt using the shifted‐Hill 1/(1+(X/K)**n) form
def shifted_inh(H, lam, K, n):
    hill = 1/(1 + (H/K)**n)
    return (1 - lam)*hill + lam

# dA/dt
exprA = gA \
    * shifted_inh(B, lamBA, KBA, nBA) \
    * shifted_inh(C, lamCA, KCA, nCA) \
    * shifted_inh(D, lamDA, KDA, nDA) \
    - dA*A

# Similarly for dB, dC, dD:
exprB = gB \
    * shifted_inh(A, lamAB, KAB, nAB) \
    * shifted_inh(C, lamCB, KCB, nCB) \
    * shifted_inh(D, lamDB, KDB, nDB) \
    - dB*B

exprC = gC \
    * shifted_inh(A, lamAC, KAC, nAC) \
    * shifted_inh(B, lamBC, KBC, nBC) \
    * shifted_inh(D, lamDC, KDC, nDC) \
    - dC*C

exprD = gD \
    * shifted_inh(A, lamAD, KAD, nAD) \
    * shifted_inh(B, lamBD, KBD, nBD) \
    * shifted_inh(C, lamCD, KCD, nCD) \
    - dD*D

# 4) Pack into a list
odes = [exprA, exprB, exprC, exprD]
params = [gA,gB,gC,gD, dA,dB,dC,dD,
          lamBA,KBA,nBA, lamCA,KCA,nCA, lamDA,KDA,nDA,
          lamAB,KAB,nAB, lamCB,KCB,nCB, lamDB,KDB,nDB,
          lamAC,KAC,nAC, lamBC,KBC,nBC, lamDC,KDC,nDC,
          lamAD,KAD,nAD, lamBD,KBD,nBD, lamCD,KCD,nCD]

# 5) Optional: simplify
odes_simpl = [sp.simplify(e) for e in odes]

In [26]:
for equation in odes_simpl:
    display(Math(sp.latex(equation)))

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

<IPython.core.display.Math object>

In [27]:
pt = "/Users/hiteshkandarpa/Desktop/IISC/Summer'25/Code/initial_sims/Toggle_tetrahedron/Hypothesis_test"
params_file = os.path.join(pt, "TS_parameters.dat")
names_file  = os.path.join(pt, "TS.prs")

# Read TS.prs, ignore first header line, take only the first token each line
with open(names_file, 'r') as f:
    lines       = [ln.strip() for ln in f if ln.strip()]
param_names = [ln.split()[0] for ln in lines[1:]]

# Read parameters: col0=S_no, col1=Reported_states, cols2+ = kinetic values

# 1) Read all 10,000 parameter sets
df_all = pd.read_csv(
    params_file,
    sep=r'\s+',
    header=None,
    names=["S_no", "Reported_states"] + param_names,
    nrows=20
)

# 2) Randomly sample 200 distinct rows
#    (set random_state for reproducibility if desired)
pars = df_all#.sample(n=200, random_state=100)#.reset_index(drop=True) (random_states used 42, 10, 100)

# Now `pars` contains 200 randomly chosen parameter sets out of the full 10,000.


f_num = sp.lambdify(
    (A, B, C, D) + tuple(params),
    odes_simpl,
    'numpy'
)

# Parellelised Vectorised RACIPE

In [28]:
from joblib import Parallel, delayed

def integrate_euler_sympy(row, n_ics=100, dt=0.1, n_steps=20000):
    # extract numeric params in correct order
    pvals = [float(row[str(p)]) for p in params]
    # scale for uniform IC
    scale = np.array([row[f"Prod_of_{G}"]/row[f"Deg_of_{G}"] for G in ['A','B','C','D']], float)
    Xs = np.random.uniform(0, 1, size=(n_ics, 4)) * scale[None,:]
    for _ in range(n_steps):
        A_vec, B_vec, C_vec, D_vec = Xs.T
        dA, dB, dC, dD = f_num(A_vec, B_vec, C_vec, D_vec, *pvals)
        Xs += dt * np.vstack((dA, dB, dC, dD)).T
        Xs[Xs<0] = 0
    return Xs
    
# 1) Define a helper to process one parameter set
def process_param(row, tol=1.0, n_ics=100, dt=0.1, n_steps=20000):
    s_no = int(row["S_no"])
    # integrate all ICs vectorized
    Xs = integrate_euler_sympy(row, n_ics=n_ics, dt=dt, n_steps=n_steps)
    # cluster unique finals
    finals = []
    for x in Xs:
        if not any(np.allclose(x, f, atol=tol) for f in finals):
            finals.append(x.copy())
    # log2 transform
    flat = [np.log2(v) if v>0 else -np.inf for state in finals for v in state]
    return [s_no, len(finals)] + flat

# 2) Parallel execution over your pars DataFrame
#    Use all available cores; set n_jobs accordingly
results = Parallel(n_jobs=-1)(
    delayed(process_param)(pars.iloc[i])
    for i in range(len(pars))
)

# 3) Assemble into DataFrame
genes = ['A','B','C','D']
max_states = max((len(r) - 2)//4 for r in results)
cols = ["S_no","n_states"] + [
    f"{g}_ss{s+1}"
    for s in range(max_states)
    for g in genes
]
df_verify_parallel = pd.DataFrame(results, columns=cols)

df_verify_parallel


Unnamed: 0,S_no,n_states,A_ss1,B_ss1,C_ss1,D_ss1,A_ss2,B_ss2,C_ss2,D_ss2,...,C_ss19,D_ss19,A_ss20,B_ss20,C_ss20,D_ss20,A_ss21,B_ss21,C_ss21,D_ss21
0,1,2,0.54957,-1.211174,-6.659352,0.47581,5.106405,2.037353,-6.853657,-6.040165,...,,,,,,,,,,
1,2,1,-7.318248,0.707534,2.100855,-2.847394,,,,,...,,,,,,,,,,
2,3,2,1.610899,-2.7828,0.334529,-5.695817,-3.927558,-7.955583,1.964067,-0.047971,...,,,,,,,,,,
3,4,2,0.766111,-0.02979,-7.851651,-6.870361,0.488925,-4.706726,-3.733918,0.226854,...,,,,,,,,,,
4,5,3,-6.97329,1.944064,-4.252845,0.911907,-7.537327,-1.214237,0.032266,1.998035,...,,,,,,,,,,
5,6,21,-1.27115,4.42811,-7.006648,-0.115713,-4.095543,1.782997,-6.797281,0.661438,...,-6.129505,-0.625688,-0.24705,2.246243,-2.956553,-1.356925,-0.527709,-0.506621,-2.768171,1.214158
6,7,2,-1.595041,0.900125,1.618447,-8.546425,0.834189,3.529702,-2.199929,-9.114134,...,,,,,,,,,,
7,8,2,0.555849,-1.424328,-5.000178,2.399917,-4.678974,3.169363,-4.053261,3.255306,...,,,,,,,,,,
8,9,2,-4.418381,-7.285463,6.066426,3.792055,-0.40566,-1.232729,4.138996,-0.100993,...,,,,,,,,,,
9,10,1,-3.459029,2.930963,1.128937,-5.255404,,,,,...,,,,,,,,,,


# Writing this to output file

In [22]:
# Filter out parameter sets with >10 steady states and write RACIPE‐style output

# genes and DataFrame from above
genes = ['A','B','C','D']

# 1) Drop rows with more than 10 steady states
df_filtered = df_verify_parallel[df_verify_parallel['n_states'] <= 10].copy()

# 2) Write to file in RACIPE format
output_path = os.path.join(pt, "3_random_200_RACIPE_custom_ss_noepigenetic.dat")
with open(output_path, 'w') as fout:
    for _, row in df_filtered.iterrows():
        s_no     = int(row['S_no'])
        n_states = int(row['n_states'])
        # collect only the actual steady‐state columns
        vals = []
        for s in range(n_states):
            for g in genes:
                vals.append(row[f"{g}_ss{s+1}"])
        # compose and write line
        line = [s_no, n_states] + vals
        fout.write("\t".join(f"{v:.6g}" for v in line) + "\n")

print(f"Wrote {len(df_filtered)} parameter sets to {output_path}")


Wrote 197 parameter sets to /Users/hiteshkandarpa/Desktop/IISC/Summer'25/Code/initial_sims/Toggle_tetrahedron/Hypothesis_test/3_random_200_RACIPE_custom_ss_noepigenetic.dat


# Measuring differences Between RACIPE Output and Vectorised Sympy RACIPE 

In [76]:

# Path to RACIPE solutions file
raci_path = "/Users/hiteshkandarpa/Desktop/IISC/Summer'25/Code/initial_sims/Toggle_tetrahedron/Hypothesis_test/combined_solutions2.dat"

# 1) Read just the first two columns: parameter set number and reported number of states
df_raci = pd.read_csv(
    raci_path,
    sep=r'\s+',
    header=None,
    usecols=[0, 1],
    names=['S_no', 'R_states']
)

# 2) Filter to only those parameter sets you simulated (in df_verify)
df_raci_sub = df_raci[df_raci['S_no'].isin(df_verify['S_no'])]

# 3) Merge with your simulated results
df_comp = df_raci_sub.merge(
    df_verify[['S_no', 'n_states']],
    on='S_no',
    how='inner'
)

# 4) Compare
df_comp['match'] = df_comp['R_states'] == df_comp['n_states']
n_total   = len(df_comp)
n_match   = df_comp['match'].sum()
n_diff    = n_total - n_match
diff_sets = df_comp.loc[~df_comp['match'], 'S_no'].tolist()

# 5) Print summary
print(f"Compared {n_total} parameter sets (only those you simulated).")
print(f"{n_match} agree on steady‐state count.")
print(f"{n_diff} differ: {diff_sets}")

# 6) Show comparison details
df_comp


Compared 44 parameter sets (only those you simulated).
23 agree on steady‐state count.
21 differ: [4, 8, 10, 11, 12, 14, 20, 22, 29, 31, 33, 34, 38, 39, 42, 43, 45, 46, 47, 49, 50]


Unnamed: 0,S_no,R_states,n_states,match
0,2,1,1,True
1,4,3,2,False
2,5,3,3,True
3,8,17,2,False
4,9,2,2,True
5,10,3,1,False
6,11,3,2,False
7,12,4,2,False
8,13,2,2,True
9,14,4,2,False
