In [1]:
%run params.ipynb

In [2]:
if 'name' in PARAMS:
    None
else:
    from rules.enzymes import *
    PARAMS['name'] = 'v01_oscillating_del_Arp2_3'
    PARAMS['report_basins'] = [[Actin_BR, Actin_ST]]

In [7]:
import pandas as pd
import re

In [8]:
data_file_name = build_filename(PARAMS, "simulations.h5")

df = pd.DataFrame()
for key in pd.HDFStore(data_file_name).keys():
    df = df.append(pd.read_hdf(data_file_name, key=key))

df.shape

(100, 53)

In [9]:
def normalize_cycle(raw):
    str1 = ''.join(str(e) for e in raw)    
    return re.sub(r'^(.+?)\1+$', r'\1', str1)      

def unique_cycles(ser):
    cycles = ser.unique()
    cycles_long = cycles * 10
    cycles_map = {}

    while len(cycles) > 0:
        pattern = cycles[0]
        target = normalize_cycle(pattern)
        cycles = cycles[1:]
        cycles_long = cycles_long[1:]

        if pattern in cycles_map:        
            continue

        cycles_map[pattern] = target

        for i in range(len(cycles)):
            if len(re.findall('('+pattern+'){9}', cycles_long[i])) > 0:
                cycles_map[cycles[i]] = target
        
    return cycles_map

print("Normalizing cycles")
    
for node in df.filter(regex="^cycle_.+(?<!norm)(?<!len)(?<!start)$").columns.values:
    cmap = unique_cycles(df[node])
    df["%s_norm" % node] = df[node].apply(lambda x: cmap[x])
    

Normalizing cycles


In [10]:
print("Reporting simple basins")

file = open(build_filename(PARAMS, "simple_basins.txt"), "w")

for col in df.filter(regex="^cycle_.+_norm$").columns:
    grp = df.groupby(by=[col]).size().sort_values(ascending=False)
    rep = pd.concat([grp.rename("count"), (grp / df.shape[0]).rename('pct')], axis=1)
    file.write(str(rep) + "\n\n\n\n")

file.close()

Reporting simple basins


In [20]:
if 'report_basins' in PARAMS:
    print("Reporting special basins")
    
    file = open(build_filename(PARAMS, "special_basins.txt"), "w")

    for cols in PARAMS['report_basins']:        
        grp = df.groupby(by=["cycle_{}_norm".format(name) for name in cols]).size().sort_values(ascending=False)
        rep = pd.concat([grp.rename("count"), (grp / df.shape[0]).rename('pct')], axis=1)
        file.write(str(rep) + "\n\n\n\n")

    file.close()

Reporting special basins


In [118]:
print("Reporting weighted activity")

import numpy as np

rep = pd.DataFrame(index = np.arange(0, 1, 0.1))

for col in df.filter(regex="^cycle_.+_norm$").columns:
    grp = df.groupby(by=[col]).size().sort_values(ascending=False)
    grp = pd.concat([grp.rename("count"), (grp / df.shape[0]).rename('pct')], axis=1)
    grp["active"] = [len(x.replace('0', '')) / len(x) for x in grp.index.values]
    grp["active_rel"] = grp["pct"] * grp["active"] 
    
    rep[col] = [grp[grp["pct"] > cut]["active_rel"].sum() for cut in rep.index]
    
    
rep.transpose().to_csv(build_filename(PARAMS, "weighted_activity.csv"), float_format='%g')

Reporting weighted activity
