#### Imports

In [1]:
from glob import glob
from itertools import product
import pandas as pd
import numpy as np
from polyphys.manage import organizer
from polyphys.manage.parser import \
    SumRuleCyl, TransFociCyl, TransFociCub, HnsCub, HnsCyl
from polyphys.manage.typer import ParserT
from polyphys.analyze import measurer
import polyphys.api as api
from polyphys.api import PROJECTS_DETAILS as PSD
from polyphys.probe import logger
import warnings
warnings.filterwarnings('ignore')

#  Choose these two before running this script:
#project = 'HnsCub'  # 'SumRuleCyl', 'TransFociCyl'
project = 'TransFociCub'
#project = 'TransFociCyl'
#project = 'SumRuleCyl'
#analysis_db = '/Users/amirhsi_mini/research_data/do_not_delete/'+project+'-analysis/'
#analysis_db = '/Users/amirhsi_mini/research_data/'+project+'-analysis/'
#analysis_db = '/Users/amirhsi_mini/research_data/do_not_delete/analysis/'
#analysis_db = '/Users/amirhsi_mini/research_data/do_not_delete/'+project+'-analysis/'
#analysis_db = "/Users/amirhsi/research_data/TransFociCub-analysis/"
#analysis_db = "/Users/amirhsi/research_data/TransFociCub_InitialCompressed_CollectingEquilibrium_ExactEquilibrium/ns400nl5al5.0ac1.0-analysis/"
analysis_db = "/Users/amirhsi/research_data/SumRuleCubHeteroLinear-analysis/"
#analysis_db = '/Users/amirhsi_mini/research_data/analysis/'
#analysis_db = '../../Datasets/HnsCub-N200epshm29kbmm2ens1_2-analysis/'
# List of physical properties: Set the project hierarchy
project_details = PSD[project]

  from .autonotebook import tqdm as notebook_tqdm


# allInOne *whole* and *ensAvg* stamps per project

## ensemble-averaged stamps per project (ensAvg phase)

In [2]:
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-ensAvg'
    )
]
allInOne_stamps = []
for space_db in ens_avg_space_dbs:
    print(space_db)
    stamp_path = project_details['space_hierarchy'] + 'stamps*'
    stamp_path = glob(space_db + "/" + stamp_path + '.csv')[0]
    space_stamps = pd.read_csv(stamp_path)
    allInOne_stamps.append(space_stamps)
allInOne_stamps = pd.concat(allInOne_stamps, axis=0)
allInOne_stamps.reset_index(inplace=True, drop=True)
output = analysis_db + "allInOne-" + project + "-stamps-ensAvg.csv"
allInOne_stamps.to_csv(output, index=False)


/Users/amirhsi/research_data/SumRuleCubHeteroLinear-analysis/ns400nl5al1ac1-bug-ensAvg/


## whole stamps per project (ens phase)

In [3]:
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-ens'
    )
]
allInOne_stamps = []
for space_db in ens_avg_space_dbs:
    stamp_path = project_details['space_hierarchy'] + 'stamps*'
    stamp_path = glob(space_db + "/" + stamp_path + '.csv')[0]
    space_stamps = pd.read_csv(stamp_path)
    allInOne_stamps.append(space_stamps)
allInOne_stamps = pd.concat(allInOne_stamps, axis=0)
allInOne_stamps.reset_index(inplace=True, drop=True)
output = analysis_db + "allInOne-" + project + "-stamps-ens.csv"
allInOne_stamps.to_csv(output, index=False)

## Logs per project

In [None]:
project = 'HnsCub'  # 'SumRuleCyl', 'TransFociCyl'
#project = 'TransFociCub'
#project = 'TransFociCyl'

In [6]:
log_details = {
    'SumRuleCyl': {
        'space_pat': 'N*D*ac*',
        'hierarchy': '/N*.log',  # dir/file
        'parser': SumRuleCyl,
        'group': 'bug',
        'geometry': 'cylindrical',
        'topology': 'linear',
        'product_idx': 1
    },
    'TransFociCyl': {
        'space_pat': 'ns*nl*al*D*ac*',
        'hierarchy': '/eps*.log',  # dir/file
        'parser': TransFociCyl,
        'group': 'bug',
        'geometry': 'cylindrical',
        'topology': 'ring',
        'product_idx': 2
    },
    'TransFociCub': {
        'space_pat': 'ns*nl*al*ac*',
        'hierarchy': '/al*.log',  # dir/file
        'parser': TransFociCub,
        'group': 'bug',
        'geometry': 'cubic',
        'topology': 'ring',
        'product_idx': 2
    },
    'HnsCub': {
        'space_pat': 'N*epshm*nh*ac*',
        'hierarchy': '/N*.log',  # dir/file
        'parser': HnsCub,
        'group': 'nucleoid',
        'geometry': 'cubic',
        'topology': 'ring',
        'product_idx': 2
    }
}
#log_db = "/Users/amirhsi_mini/OneDrive - University of Waterloo/PhD Research/Jupyter/Datasets/logs/"
#log_db = '/Users/amirhsi_mini/research_data/TransFociCub/logs/'
log_db = '/Users/amirhsi_mini/research_data/do_not_delete/'+project+'-logs/'
space_dbs = glob(log_db + log_details[project]['space_pat'] + '-logs')
space_dbs.sort()
space_with_segment_lineage = [
    'N500D10.0ac0.6-logs',
    'N500D10.0ac0.8-logs',
    'N500D10.0ac1.0-logs',
    'N2000D30.0ac4.0-logs',
    'N2000D30.0ac6.0-logs'
]

In [None]:
thermos = []
run_stats = []
wall_times = []
save_to = './'

for space_db in space_dbs:
    print(space_db)
    space = space_db.split("/")[-1]
    if space in space_with_segment_lineage:
        lineage = 'segment'
    else:
        lineage = 'whole'
    logs = glob(space_db + log_details[project]['hierarchy'])
    logs = organizer.sort_filenames(logs,  fmts=['.log']) # sorted
    logs = [log[0] for log in logs]
    for log in logs:
        log_info = log_details[project]['parser'](
            log,
            lineage,
            log_details[project]['geometry'],
            log_details[project]['group'],
            log_details[project]['topology']
        )
        # handling product_idx in segmented logs:
        if lineage == 'segment' and log_info.segment_id>1:
            product_idx = 0
        # handling product_idx in restart logs which do not have product phase
        if  not (log.endswith('restart.log') | log.endswith('restart2ndRound.log')):
            product_idx = log_details[project]['product_idx']
        else:
            product_idx = 0
        try:
            log_data = logger.LammpsLog(log, product_idx)
        except (logger.BrokenLogError, IndexError):
            print("broken log: ", log_info.filepath.split("/")[-1])
        log_data.extract_thermo()
        log_data.extract_run_stat()
        thermo = log_data.thermo
        run_stat = log_data.run_stat
        wall_time = log_data.wall_time
        for attr_name in log_info._lineage_attributes[lineage].keys():
            attr_value = getattr(log_info, attr_name)
            thermo[attr_name] = attr_value
        attr_names = ['phi_m_bulk', 'rho_m_bulk', 'phi_c_bulk', 'rho_c_bulk']
        for attr_name in attr_names:
            attr_value = getattr(log_info, attr_name)
            thermo[attr_name] = attr_value
        for lineage_name in log_info.genealogy:
            attr_value = getattr(log_info, lineage_name)
            thermo[lineage_name] = attr_value
            run_stat[lineage_name] = attr_value
            wall_time[lineage_name] = attr_value
        thermos.append(thermo)
        run_stats.append(run_stat)
        wall_times.append(wall_time)

output = "-".join(["allInOne", project, "thermo"])
thermos = pd.concat(thermos)
thermos.reset_index(inplace=True, drop=True)
thermos.to_parquet(
    save_to + output + ".parquet.brotli", index=False, compression='brotli'
)
output = "-".join(["allInOne", project, "runStat"])
run_stats = pd.concat(run_stats)
run_stats.reset_index(inplace=True, drop=True)
run_stats.to_csv(save_to + output + ".csv", index=False)
output = "-".join(["allInOne", project, "wallTimeStat"])
wall_times = pd.concat(wall_times)
wall_times.reset_index(inplace=True, drop=True)
wall_times.to_csv(save_to + output + ".csv", index=False)

# ensAvg timeseries and their associated measures

## Measures of chain size timeseries properties per space

### allInONe esnAvg ACFs of the chain-size properties per space

In [4]:
%%time
# Wall time: 60 s for TransFoci
# Wall time: 4 min for SumRule
phase = 'ensAvg'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
]
# list of unique property_measures:
filepath = ens_avg_space_dbs[0] + '*' + project_details['hierarchy'] + '.csv'  # physical properties in all the
_, uniq_props_measures = organizer.unique_property(
    filepath, 2, ["-" + phase], drop_properties=['stamps'])
if project == 'TransFociCyl':
    uniq_props_measures.remove('transSizeTMon-acf')
    uniq_props_measures.remove('transSizeTMon-acfLowerCi')
    uniq_props_measures.remove('transSizeTMon-acfUpperCi')
print(uniq_props_measures)
for ens_avg_space_db in ens_avg_space_dbs:
    ens_avgs = list()
    space = ens_avg_space_db.split('/')[-2].split('-')[0]
    for property_ in uniq_props_measures:
        ens_avg = organizer.space_tseries(
            ens_avg_space_db,
            property_,
            project_details['parser'],
            project_details['hierarchy'],
            project_details['attributes'],
            project_details['group'],
            project_details['geometry'],
            project_details['topology'],
            divisor = project_details['divisor'],
            is_save = False  # if True, save per property per space
        )
        if project in ['HnsCyl', 'HnsCub']:
            ens_avg['phi_c_bulk_round'].replace(0.09,0.08, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.15,0.16, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.21,0.2, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.31,0.32, inplace=True)
            ens_avg = \
                ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.06, 0.18]),:]
        elif project in ['TransFociCyl','TransFociCub']:
            ens_avg = \
                ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.025, 0.05, 0.075, 0.125, 0.175]),:]
        else:
            print("The 'phi_c drop' condition is not defined for "
                            f"'{project}' project.")
        ens_avgs.append(ens_avg)
    ens_avgs = pd.concat(ens_avgs,axis=1)
    # drop duplicated columns:
    ens_avgs = ens_avgs.loc[:,~ens_avgs.columns.duplicated()]
    output_name = analysis_db +  "-".join(
        [space,
         project_details['group'],
         "chainSize-acf.parquet.brotli"
        ]
    )
    ens_avgs.to_parquet(output_name, index=False, compression='brotli')

['asphericityTMon-acf', 'asphericityTMon-acfLowerCi', 'asphericityTMon-acfUpperCi', 'gyrTMon-acf', 'gyrTMon-acfLowerCi', 'gyrTMon-acfUpperCi', 'shapeTMon-acf', 'shapeTMon-acfLowerCi', 'shapeTMon-acfUpperCi']
CPU times: user 154 ms, sys: 25.6 ms, total: 180 ms
Wall time: 292 ms


### **allInOne** the chain-size properties per **space**

In [5]:
%%time
# Wall time: 2 min s for TransFoci
# Wall time: 7 min for SumRule
phase = 'ensAvg'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
]
print(ens_avg_space_dbs)
# list of unique property_measures:
filepath = ens_avg_space_dbs[0] + '*' + project_details['hierarchy'] + '.csv'  # physical properties in all the
_, uniq_props_measures = organizer.unique_property(
    filepath, 2, ["-" + phase], drop_properties=['stamps'])
props_tseries = list(
    set(
        [prop.split("-acf")[0] for prop in uniq_props_measures]
    )
)

if project == 'TransFociCyl':
    props_tseries.remove('transSizeTMon')
print(props_tseries)

for ens_avg_space_db in ens_avg_space_dbs:
    ens_avgs = list()
    space = ens_avg_space_db.split('/')[-2].split('-')[0]
    for property_ in props_tseries:
        ens_avg = organizer.space_tseries(
            ens_avg_space_db,
            property_,
            project_details['parser'],
            project_details['hierarchy'],
            project_details['attributes'],
            project_details['group'],
            project_details['geometry'],
            project_details['topology'],
            divisor = project_details['divisor'],
            is_save = False  # if True, save per property per space
        )
        if project in ['HnsCyl', 'HnsCub']:
            ens_avg['phi_c_bulk_round'].replace(0.09,0.08, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.15,0.16, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.21,0.2, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.31,0.32, inplace=True)
            ens_avg = \
                ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.06, 0.18]),:]
        elif project in ['TransFociCyl','TransFociCub']:
            ens_avg = \
                ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.025, 0.05, 0.075, 0.125, 0.175]),:]
        else:
            print("The 'phi_c drop' condition is not defined for "
                            f"'{project}' project.")
        ens_avgs.append(ens_avg)
    ens_avgs = pd.concat(ens_avgs,axis=1)
    # drop duplicated columns:
    ens_avgs = ens_avgs.loc[:,~ens_avgs.columns.duplicated()]
    output_name = analysis_db +  "-".join(
        [space,  project_details['group'], "chainSize.parquet.brotli"]
    )
    ens_avgs.to_parquet(output_name, index=False, compression='brotli')

['/Users/amirhsi/research_data/SumRuleCubHeteroLinear-analysis/ns400nl5al1ac1-bug-ensAvg/']
['asphericityTMon', 'gyrTMon', 'shapeTMon']
CPU times: user 243 ms, sys: 43.2 ms, total: 286 ms
Wall time: 317 ms


## TransFoci Project: Pair distance time-series per project

In [6]:
%%time
#analysis_db = '/Users/amirhsi_mini/research_data/analysis/'
#analysis_db = '/Users/amirhsi_mini/research_data/do_not_delete/'+project+'-analysis/'
phase = 'ensAvg'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
]
print(ens_avg_space_dbs)
tseries_foci_props = ['pairDistTFoci']
project_ens_avgs = []
for prop in tseries_foci_props:
    prop_ens_avgs = list()
    for ens_avg_space_db in ens_avg_space_dbs:
        space = ens_avg_space_db.split('/')[-2].split('-')[0]
        ens_avg = organizer.space_tseries(
            ens_avg_space_db,
            prop,
            project_details['parser'],
            project_details['hierarchy'],
            project_details['attributes'],
            project_details['group'],
            project_details['geometry'],
            project_details['topology'],
            is_save = False  # if True, save per property per space
        )
        prop_ens_avgs.append(ens_avg)
    prop_ens_avgs = pd.concat(prop_ens_avgs,axis=0)
    # drop duplicated columns:
    prop_ens_avgs = prop_ens_avgs.loc[:, ~prop_ens_avgs.columns.duplicated()]
    prop_ens_avgs.reset_index(inplace=True, drop=True)
    project_ens_avgs.append(prop_ens_avgs)
project_ens_avgs = pd.concat(project_ens_avgs,axis=1)
project_ens_avgs = \
    project_ens_avgs.loc[:, ~project_ens_avgs.columns.duplicated()]
project_ens_avgs.reset_index(inplace=True, drop=True)
output ='-'.join(['allInOne', project, project_details['group'], 'pairDistT.parquet.brotli'])
output = analysis_db + output
project_ens_avgs.to_parquet(output, index=False, compression='brotli')

['/Users/amirhsi/research_data/SumRuleCubHeteroLinear-analysis/ns400nl5al1ac1-bug-ensAvg/']


ValueError: No objects to concatenate

# Equilibrium timeseries properties per space AND per project

## Whole equilibrium properties allInOne

In [7]:
%%time
# Wall time: 23 s for TransFociCyl
# Wall time: 10 min s for SumRule
# Wall time: 5 s for TransFociCub
spaces = glob(analysis_db + project_details['space_pat'])
spaces = sorted(list(set([space.split('/')[-1].split('-')[0] for space in spaces])))
save_space = True
equili_props_wholes = api.all_in_one_equil_tseries(
    project,
    analysis_db,
    project_details['group'],
    spaces,
    project_details['time_varying_props'],
    project_details['equil_measures'],
    save_space=save_space,
    divisor=project_details['divisor'],
    round_to=3,
    kind='dataframe',
    save_to=analysis_db,
)

CPU times: user 153 ms, sys: 33.7 ms, total: 187 ms
Wall time: 231 ms


In [8]:
equili_props_wholes.columns

Index(['lineage_name', 'whole', 'ensemble_long', 'ensemble', 'space',
       'dmon_large', 'nmon_large', 'mmon_large', 'nmon_small', 'dcrowd',
       'ncrowd', 'lcube', 'dt', 'bdump', 'adump', 'ensemble_id', 'dmon_small',
       'mmon_small', 'mcrowd', 'eps_others', 'phi_m_bulk', 'rho_m_bulk',
       'phi_c_bulk', 'rho_c_bulk', 'n_frames', 'asphericityMon-mean',
       'asphericityMon-var', 'asphericityMon-sem', 'gyrMon-mean', 'gyrMon-var',
       'gyrMon-sem', 'shapeMon-mean', 'shapeMon-var', 'shapeMon-sem',
       'phi_c_bulk_round'],
      dtype='object')

In [9]:
import itertools
properties = project_details['equil_properties']
print(properties)
norm_props = [
        prop.split('-')[0] for prop in properties if prop.endswith('mean')]
print(norm_props)
norm_prop_mean = [prop for prop in properties if prop.endswith('mean')] 
spaces = equili_props_wholes.space.unique()
space_con = (equili_props_wholes['space'].isin(spaces))
phi_c_con = (equili_props_wholes['phi_c_bulk_round'] == 0)
cols = ['space', 'ensemble', 'whole' ] + norm_prop_mean
print(equili_props_wholes.columns)
phi_c_counts = equili_props_wholes.loc[space_con & phi_c_con, cols]#for space, prop in itertools.product(spaces, norm_props):
#    space_con = equili_props_wholes['space'] == space
#    phi_c_con = equili_props_wholes['phi_c_bulk_round'] == 0
#    prop_0 = equili_props_wholes.loc[space_con & phi_c_con, prop + "-mean"]#.to_numpy()[0]  # type: ignore
    


['asphericityMon-mean', 'asphericityMon-var', 'asphericityMon-sem', 'gyrMon-mean', 'gyrMon-var', 'gyrMon-sem', 'shapeMon-mean', 'shapeMon-var', 'shapeMon-sem']
['asphericityMon', 'gyrMon', 'shapeMon']
Index(['lineage_name', 'whole', 'ensemble_long', 'ensemble', 'space',
       'dmon_large', 'nmon_large', 'mmon_large', 'nmon_small', 'dcrowd',
       'ncrowd', 'lcube', 'dt', 'bdump', 'adump', 'ensemble_id', 'dmon_small',
       'mmon_small', 'mcrowd', 'eps_others', 'phi_m_bulk', 'rho_m_bulk',
       'phi_c_bulk', 'rho_c_bulk', 'n_frames', 'asphericityMon-mean',
       'asphericityMon-var', 'asphericityMon-sem', 'gyrMon-mean', 'gyrMon-var',
       'gyrMon-sem', 'shapeMon-mean', 'shapeMon-var', 'shapeMon-sem',
       'phi_c_bulk_round'],
      dtype='object')


In [10]:
ens_avg = api.all_in_one_equil_tseries_ens_avg(
    project,
    equili_props_wholes,
    project_details['group'],
    project_details['equil_properties'],
    project_details['equil_attributes'],
    save_to=analysis_db
)

# Distributions

## Persistence lengths: Hns Project

In [None]:
phase = 'ensAvg'
ext = '.csv'
property_ext = phase + '-mean' + ext
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
]
filepath = ens_avg_space_dbs[0] + project_details['hierarchy'] + ext # physical properties in all the
uniq_props,  _ = organizer.unique_property(
    filepath, 2, ["-" + phase], drop_properties=['stamps'])
bond_props = list(
    set(
        [prop for prop in uniq_props if prop.startswith("bondCosine")]
    )
)
print(bond_props)
project_all_in_one_list = list()
polymer_topo = project_details['topology']
for ens_avg_space_db in ens_avg_space_dbs:
    ens_avgs_list = list()
    space = ens_avg_space_db.split('/')[-2].split('-')[0]
    space_info = project_details['parser'](
        space,
        'space',
        project_details['geometry'],
        project_details['group'],
        project_details['topology'],
        ispath=False
    )
    bonds_per_topology = {
       'linear': np.arange(1, space_info.nmon, 1),
       'ring': np.arange(1, space_info.nmon+1, 1)
    }
    for property_ in bond_props:
        ens_avg = organizer.space_hists(
            ens_avg_space_db,
            property_,
            project_details['parser'],
            project_details['hierarchy'],
            project_details['attributes'],
            project_details['group'],
            project_details['geometry'],
            project_details['topology'],
            divisor = project_details['divisor'],
            bin_center=bonds_per_topology[polymer_topo],
            normalize=False,
            is_save=False
        )
        if project in ['HnsCyl', 'HnsCub']:
            ens_avg['phi_c_bulk_round'].replace(0.09,0.08, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.15,0.16, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.21,0.2, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.31,0.32, inplace=True)
            ens_avg = \
                ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.06, 0.18]),:]
        elif project in ['TransFociCyl','TransFociCub']:
            ens_avg = \
                ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.025, 0.05, 0.075, 0.125, 0.175]),:]
        else:
            raise ValueError("The 'phi_c drop' condition is not defined for "
                            f"'{project}' project.")
        ens_avgs_list.append(ens_avg)
    ens_avgs = pd.concat(ens_avgs_list,axis=1)
    # drop duplicated columns:
    ens_avgs = ens_avgs.loc[:, ~ens_avgs.columns.duplicated()]
    ens_avgs.reset_index(inplace=True, drop=True)
    project_all_in_one_list.append(ens_avgs)
project_all_in_one = pd.concat(project_all_in_one_list,axis=0)
project_all_in_one = \
    project_all_in_one.loc[:, ~project_all_in_one.columns.duplicated()]
project_all_in_one.reset_index(inplace=True, drop=True)
output = '-'.join(
    ['allInOne', project, project_details['group'], 'bondCosCorrVecMon.csv']
)
output = analysis_db + output
project_all_in_one.to_csv(output, index=False)

## Intra-chain loop lenght hist: Hns Project

In [None]:
phase = 'ensAvg'
ext = '.csv'
property_ext = phase + '-mean' + ext
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
]
filepath = ens_avg_space_dbs[0] + project_details['hierarchy'] + ext # physical properties in all the
uniq_props,  _ = organizer.unique_property(
    filepath, 2, ["-" + phase], drop_properties=['stamps'])
bond_props = list(
    set(
        [prop for prop in uniq_props if prop.startswith("loopLengthHistMon")]
    )
)
print(bond_props)
project_all_in_one_list = list()
polymer_topo = project_details['topology']
for ens_avg_space_db in ens_avg_space_dbs:
    space = ens_avg_space_db.split('/')[-2].split('-')[0]
    space_info = project_details['parser'](
        space,
        'space',
        project_details['geometry'],
        project_details['group'],
        project_details['topology'],
        ispath=False
    )
    max_loop_per_topology = {
       'linear': np.arange(0, space_info.nmon, 1),
       'ring': np.arange(0, space_info.nmon//2+1, 1)
    }
    ens_avg = organizer.space_hists(
        ens_avg_space_db,
        'loopLengthHistMon',
        project_details['parser'],
        project_details['hierarchy'],
        project_details['attributes'],
        project_details['group'],
        project_details['geometry'],
        project_details['topology'],
        divisor = project_details['divisor'],
        bin_center=max_loop_per_topology[polymer_topo],
        normalize=False,
        is_save=False
    )
    if project in ['HnsCyl', 'HnsCub']:
            ens_avg['phi_c_bulk_round'].replace(0.09,0.08, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.15,0.16, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.21,0.2, inplace=True)
            ens_avg['phi_c_bulk_round'].replace(0.31,0.32, inplace=True)
            ens_avg = \
                ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.06, 0.18]),:]
    elif project in ['TransFociCyl','TransFociCub']:
        ens_avg = \
            ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.025, 0.05, 0.075, 0.125, 0.175]),:]
    else:
        raise ValueError("The 'phi_c drop' condition is not defined for "
                        f"'{project}' project.")
    ens_avg.rename(columns={'bin_center': 'genomic_distance'}, inplace=True)
    # drop duplicated columns:
    ens_avg = ens_avg.loc[:,~ens_avg.columns.duplicated()]
    ens_avg.reset_index(inplace=True, drop=True)
    project_all_in_one_list.append(ens_avg)
project_all_in_one = pd.concat(project_all_in_one_list,axis=0)
project_all_in_one = \
    project_all_in_one.loc[:, ~project_all_in_one.columns.duplicated()]
output = '-'.join(
    ['allInOne', project, project_details['group'], 'loopLengthHistMon.csv']
)
output = analysis_db + output
project_all_in_one.to_csv(output, index=False)


## Clusters and bonds per project: TransFoci

- Applicable to any project in which clustering happens such as **HnsCub**, **TransFociCub**,and **TransFociCyl**.
- The histograms of **Clusters** and **bonds** can **not** be combined in **one** dataset.
- Since **per project** datasets are small, we create **one** per project dataset for each property.

In [11]:
phase = 'ensAvg'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
]
print(ens_avg_space_dbs)

nmon_large = 5
hist_t_foci_bin_centers = {
   'bondsHistFoci': np.arange(nmon_large),
   'clustersHistFoci': np.arange(1, nmon_large + 1)
}
# Separate dataset for bonds and clusters per
for prop, bin_center in hist_t_foci_bin_centers.items():
    ens_avgs = list()
    for ens_avg_space_db in ens_avg_space_dbs:
        print(ens_avg_space_db)
        space = ens_avg_space_db.split('/')[-2].split('-')[0]
        ens_avg = organizer.space_hists(
            ens_avg_space_db,
            prop,
            project_details['parser'],
            project_details['hierarchy'],
            project_details['attributes'],
            project_details['group'],
            project_details['geometry'],
            project_details['topology'],
            bin_center=bin_center,
            normalize=True,
            divisor=project_details['divisor'],
            is_save = False
        )
        ens_avgs.append(ens_avg)
    ens_avgs = pd.concat(ens_avgs,axis=0)
    # drop duplicated columns:
    ens_avgs = ens_avgs.loc[:, ~ens_avgs.columns.duplicated()]
    ens_avgs.reset_index(inplace=True, drop=True)
    output =  "-".join(['allInOne', project, project_details['group'], prop + ".parquet.brotli"])
    output = analysis_db + output
    ens_avgs.to_parquet(output, index=False, compression='brotli')

['/Users/amirhsi/research_data/SumRuleCubHeteroLinear-analysis/ns400nl5al1ac1-bug-ensAvg/']
/Users/amirhsi/research_data/SumRuleCubHeteroLinear-analysis/ns400nl5al1ac1-bug-ensAvg/


ValueError: No objects to concatenate

## Clusters and bonds per project: HnsCyl

- Applicable to any project in which clustering happens such as **HnsCub**, **TransFociCub**,and **TransFociCyl**.
- The histograms of **Clusters** and **bonds** can **not** be combined in **one** dataset.
- Since **per project** datasets are small, we create **one** per project dataset for each property.

In [None]:
phase = 'ensAvg'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = sorted([
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
])
#print(ens_avg_space_dbs)
func_bin_center = {
        'bondsHistHnsCore': lambda x: np.arange(x),
        'clustersHistHnsCore': lambda x: np.arange(1, x + 1),
        'bondsHistDirDepHnsCore': lambda x: np.arange(x),
        'clustersHistDirDepHnsCore': lambda x: np.arange(1, x + 1),
        'bondsHistDangleHnsCore': lambda x: np.arange(x),
        'clustersHistDangleHnsCore': lambda x: np.arange(1, x + 1),
        'bondsHistBridgeHnsCore': lambda x: np.arange(x),
        'clustersHistBridgeHnsCore': lambda x: np.arange(1, x + 1)
        }
# Separate dataset for bonds and clusters per
for prop, func in func_bin_center.items():
    ens_avgs = list()
    for ens_avg_space_db in ens_avg_space_dbs:
        space = ens_avg_space_db.split('/')[-2].split('-')[0]
        space_info = HnsCub(
            space, 'space', 'cubic', 'nucleoid', 'ring', ispath=False
            )
        if space_info.nhns == 0:
            bin_centers = func(space_info.nhns)
        else:
            bin_centers = func(space_info.nhns)
        print("ATTENTION: normalized is False.")
        ens_avg = organizer.space_hists(
            ens_avg_space_db,
            prop,
            project_details['parser'],
            project_details['hierarchy'],
            project_details['attributes'],
            project_details['group'],
            project_details['geometry'],
            project_details['topology'],
            bin_center=bin_centers,
            normalize=False,
            divisor=project_details['divisor'],
            is_save = False
        )
        ens_avgs.append(ens_avg)
    ens_avgs = pd.concat(ens_avgs,axis=0)
    # drop duplicated columns:
    ens_avgs = ens_avgs.loc[:, ~ens_avgs.columns.duplicated()]
    ens_avgs.reset_index(inplace=True, drop=True)
    output =  "-".join(['allInOne', project, project_details['group'], prop + ".parquet.brotli"])
    output = analysis_db + output
    ens_avgs.to_parquet(output, index=False, compression='brotli')

## TransFoci and HnsCub Projects: Pair Distance Statistics per project: **bug** or **nuceloid** groups

- Applicable to any project in which oair distance matters such as **HnsCub**, **TransFociCub**,and **TransFociCyl**.
- These **properties** can be **combined** in one file per project.
- Since **per project** datasets are small, we create **one** per project dataset for **all** properties.

In [12]:
phase = 'ensAvg'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        project_details['group'] + '-' + phase
    )
]
print(ens_avg_space_dbs)
hist_foci_props = ['pairDistHistFoci', 'pairDistRdfFoci']
# One per-project database for both property since they are small and related
project_ens_avgs = []
for prop in hist_foci_props:
    prop_ens_avgs = list()
    for ens_avg_space_db in ens_avg_space_dbs:
        space = ens_avg_space_db.split('/')[-2].split('-')[0]
        ens_avg = organizer.space_hists(
            ens_avg_space_db,
            prop,
            project_details['parser'],
            project_details['hierarchy'],
            project_details['attributes'],
            project_details['group'],
            project_details['geometry'],
            project_details['topology'],
            bin_center=None,
            normalize=False,
            is_save=False
        )
        prop_ens_avgs.append(ens_avg)
    prop_ens_avgs = pd.concat(prop_ens_avgs,axis=0)
    # drop duplicated columns:
    prop_ens_avgs = prop_ens_avgs.loc[:, ~prop_ens_avgs.columns.duplicated()]
    prop_ens_avgs.reset_index(inplace=True, drop=True)
    project_ens_avgs.append(prop_ens_avgs)
project_ens_avgs = pd.concat(project_ens_avgs,axis=1)
# drop duplicated columns:
project_ens_avgs = project_ens_avgs.loc[:, ~project_ens_avgs.columns.duplicated()]
project_ens_avgs.reset_index(inplace=True, drop=True)
output = '-'.join(
    ['allInOne', project, project_details['group'], 'pairDistStats.parquet.brotli']
)
output = analysis_db + output
project_ens_avgs.to_parquet(output, index=False, compression='brotli')


['/Users/amirhsi/research_data/SumRuleCubHeteroLinear-analysis/ns400nl5al1ac1-bug-ensAvg/']


ValueError: No objects to concatenate

## Spatial Distributions and the sum rule: **all** group

- Finding the spatial histogram, number density, and local volume fraction in different geometries.

### NOT needed: allInOne Local Distributions: 

- ensAvg of Hists, Rhos, Phis with var and sem per project: Do not need to run this as the information already exist in the "allIneOne Sum-Rule" section

In [None]:
phase = 'ensAvg'
group = 'all'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
       group  + '-' + phase
    )
]
print(ens_avg_space_dbs)
# list of unique properties and property_measures:
# Local distributions do not have any property_measures:
uniq_props, _ = organizer.unique_property(
    ens_avg_space_dbs[0] + '*' + \
        project_details['hierarchy'] + '.csv',
    2,
    ["-" + phase],
    drop_properties=["stamps"])
print(uniq_props)
directions = ['theta', 'z', 'r']
for direction in directions:
    props_by_dir = [prop for prop in uniq_props if prop.startswith(direction)]
    dir_ens_avgs = list()
    for prop in props_by_dir:
        prop_ens_avgs = list()
        for ens_avg_space_db in ens_avg_space_dbs:
            space = ens_avg_space_db.split('/')[-2].split('-')[0]
            ens_avg = organizer.space_hists(
                ens_avg_space_db,
                prop,
                project_details['parser'],
                project_details['hierarchy'],
                project_details['attributes'],
                group,
                project_details['geometry'],
                project_details['topology'],
                divisor = project_details['divisor'],
                normalize=True,
                is_save=False
            )
            if project in ['HnsCyl', 'HnsCub']:
                ens_avg['phi_c_bulk_round'].replace(0.09,0.08, inplace=True)
                ens_avg['phi_c_bulk_round'].replace(0.15,0.16, inplace=True)
                ens_avg['phi_c_bulk_round'].replace(0.21,0.2, inplace=True)
                ens_avg['phi_c_bulk_round'].replace(0.31,0.32, inplace=True)
                ens_avg = \
                    ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.06, 0.18]),:]
            elif project in ['TransFociCyl','TransFociCub']:
                ens_avg = \
                    ens_avg.loc[~ens_avg['phi_c_bulk_round'].isin([0.025, 0.05, 0.075, 0.125, 0.175]),:]
            else:
                raise ValueError("The 'phi_c drop' condition is not defined for "
                                f"'{project}' project.")
            prop_ens_avgs.append(ens_avg)
        prop_ens_avgs = pd.concat(prop_ens_avgs,axis=0)
        # drop duplicated columns:
        prop_ens_avgs = \
            prop_ens_avgs.loc[:, ~prop_ens_avgs.columns.duplicated()]
        prop_ens_avgs.reset_index(inplace=True, drop=True)
        dir_ens_avgs.append(prop_ens_avgs)
    dir_ens_avgs_df = pd.concat(dir_ens_avgs,axis=1)
        # drop duplicated columns:
    dir_ens_avgs_df = \
        dir_ens_avgs_df.loc[:, ~dir_ens_avgs_df.columns.duplicated()]
    dir_ens_avgs_df.reset_index(inplace=True, drop=True)
    output = analysis_db +  "-".join([
        'allInOne', project,  project_details['group'],  direction + "LocalDist.parquet.brotli"
    ])
    dir_ens_avgs_df.to_parquet(output, index=False, compression='brotli')

### allInONe Sum-Rule:

In [12]:
phase = 'ensAvg'
group = 'all'
space_dbs = glob(analysis_db + project_details['space_pat'])
ens_avg_space_dbs = [
    space_db + "/" for space_db in space_dbs if space_db.endswith(
        group + '-' + phase
    )
]
print(ens_avg_space_dbs)
species_dict = project_details['rhosPhisNormalizedScaled']
print('species_dict: ', project_details['rhosPhisNormalizedScaled'])
dir_prop_pairs = list(
    product(project_details['props'],
            project_details['directions'])
)
print('dir_prop_pairs: ', dir_prop_pairs)

['/Users/amirhsi/research_data/analysis/N2000D120.0ac6.0-all-ensAvg/']
species_dict:  [('Mon', 'dmon'), ('Crd', 'dcrowd')]
dir_prop_pairs:  [('Rho', 'r'), ('Rho', 'z'), ('Phi', 'r'), ('Phi', 'z')]


In [14]:
for (prop, direction) in dir_prop_pairs:
    all_in_one_list = list()
    for (species, size_attr) in species_dict:
        per_species_list = list()
        for ens_avg_space_db in ens_avg_space_dbs:
            space = ens_avg_space_db.split('/')[-2].split('-')[0]
            per_space = organizer.space_sum_rule(
                ens_avg_space_db,
                prop,
                project_details['parser'],
                project_details['hierarchy'],
                project_details['attributes'],
                species,
                size_attr,
                group,
                project_details['geometry'],
                project_details['topology'],
                direction,
                divisor=project_details['divisor'],
                is_save=False
            )
            if project in ['HnsCyl', 'HnsCub']:
                per_space['phi_c_bulk_round'].replace(0.09,0.08, inplace=True)
                per_space['phi_c_bulk_round'].replace(0.15,0.16, inplace=True)
                per_space['phi_c_bulk_round'].replace(0.21,0.2, inplace=True)
                per_space['phi_c_bulk_round'].replace(0.31,0.32, inplace=True)
                per_space = \
                    per_space.loc[~per_space['phi_c_bulk_round'].isin([0.06, 0.18]),:]
            elif project in ['TransFociCyl','TransFociCub']:
                per_space = \
                    per_space.loc[~per_space['phi_c_bulk_round'].isin([0.025, 0.05, 0.075, 0.125, 0.175]),:]
            else:
                print("The 'phi_c drop' condition is not defined for "
                                f"'{project}' project.")
            per_species_list.append(per_space)
        per_species = pd.concat(per_species_list,axis=0)
        per_species = per_species.loc[:, ~per_species.columns.duplicated()]
        per_species.reset_index(inplace=True, drop=True)
        all_in_one_list.append(per_species)
    all_in_one = pd.concat(all_in_one_list,axis=1)
    all_in_one = all_in_one.loc[:, ~all_in_one.columns.duplicated()]
    all_in_one.reset_index(inplace=True, drop=True)
    output = '-'.join(['allInOne', project, group, direction + prop])
    output += '-NormalizedScaled.parquet.brotli'
    output = analysis_db + output
    all_in_one.to_parquet(output, index=False, compression='brotli')

The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
The 'phi_c drop' condition is not defined for 'SumRuleCyl' project.
