# AllInOne Files

#### Faking nc=0 for some spaces:

Faking is done at the **ens** and **ensAvg** levels (and directories) not at the **segment** and **whole** levles, so the **segment stamps** are not modified byt the **whole** and **ens** ones are modified.

#### Naming convention:

This is the pattern of file or directory names:

1. **whole** files: whole-group-property_[-measure][-stage][.ext]
2. **ensemble** files: ensemble-group-property_[-measure][-stage][.ext]
3. **ensemble_long** files: ensemble_long-group-property_[-measure][-stage][.ext]
4. **space** files: space-group-property_[-measure][-stage][.ext]
5. **all in one** files: **allInOne**-group-property_[-measure][-stage][.ext]

[keyword] means that the keyword in the file name is option. [-measure] is a physical measurement such as the auto correlation function (AFC) done on the physical 'property_'.

#### Imports

In [None]:
# settings for testing and running on a PC.
from glob import glob
import pathlib
import pandas as pd
import numpy as np
import math
import re
from polyphys.manage import organizer
from polyphys.manage.parser import SumRule
from polyphys.analyze import analyzer
from polyphys.analyze import distributions
import MDAnalysis as mda
import seaborn as sns
import matplotlib.pyplot as plt

#### Configurations for loading files

In [None]:
# list of unique property_measures:
database = '/Users/amirhsi_mini/analysis/'
bug_property_measures = glob(database+"/N*-ensAvg"+"/N*.csv")
bug_property_measures = list(set(["-".join(property_measure.split("/")[-1].split(".csv")[0] .split("-")[2:]) for property_measure in bug_property_measures]))
bug_property_measures.remove("stamps-ensAvg")
bug_property_measures.sort()

## allInONe ensAvg stamps:

In [None]:
database = '/Users/amirhsi_mini/analysis/'
spaces_stamps = glob(database+"/N*-ensAvg"+"/N*-stamps-ensAvg.csv")
allInOne_stamps = []
for space in spaces_stamps:
    space_stamps = pd.read_csv(space)
    allInOne_stamps.append(space_stamps)
allInOne_stamps = pd.concat(allInOne_stamps, axis=0)
allInOne_stamps

## chain-size timeseries and their associated measures

In [None]:
# separating property_measures of kinds timeseries and timesseries acfs:
bug_property_acfs = list()
for property_measure in bug_property_measures:
    if "-acf" in property_measure:
        bug_property_acfs.append(property_measure)
bug_property_acfs.sort()
print(bug_property_acfs)
# chain timeseries:
bug_properties = list()
for property_measure in bug_property_acfs:
    if "-acf-" in property_measure:
        bug_properties.append(property_measure.split("-")[0]+'-ensAvg')
bug_properties.sort()
print(bug_properties)

In [None]:
# allInOne timeseries for chain-size statistics
group = 'bug'
geometry = 'biaxial'
ensAvg_path = "/Users/amirhsi_mini/analysis/N2000D30.0ac4.0-bug-ensAvg"
ensAvgs = []
for property_measure in bug_properties:
    ensAvg = organizer.all_in_one_tseries(
        ensAvg_path,
        property_measure,
        group = group,
        geometry = geometry,
        save_to = None
    )
    ensAvgs.append(ensAvg)
ensAvgs = pd.concat(ensAvgs,axis=1)
# drop duplicated columns:
ensAvgs = ensAvgs.loc[:,~ensAvgs.columns.duplicated()]
output_name = database + "allInOne-bug-chainSize.csv"
ensAvgs.to_csv(output_name, index=False)

In [None]:
# all in one timeseries for chain-size acf statistics
group = 'bug'
geometry = 'biaxial'
ensAvg_path = "/Users/amirhsi_mini/analysis/N2000D30.0ac4.0-bug-ensAvg"
ensAvgs = list()
for property_measure in bug_property_acfs:
    ensAvg = organizer.all_in_one_tseries(
        ensAvg_path,
        property_measure,
        group = group,
        geometry = geometry,
        save_to = None
    )
    ensAvgs.append(ensAvg)
ensAvgs = pd.concat(ensAvgs,axis=1)
# drop duplicated columns:
ensAvgs = ensAvgs.loc[:,~ensAvgs.columns.duplicated()]
output_name = database + "allInOne-bug-chainSize-acf.csv"
ensAvgs.to_csv(output_name, index=False)

In [None]:
# parallel version has memory leak issue.
%%time
# This has memory leaking issue
group = 'bug'
geometry = 'biaxial'
ensAvg_path = "/Users/amirhsi_mini/analysis/N2000D30.0ac4.0-bug-ensAvg"
all_in_one_computed = []
for property_measure in bug_property_measures:
    all_in_one_delayed = delayed(organizer.all_in_one_tseries)(
        ensAvg_path,
        property_measure,
        group = group,
        geometry = geometry,
        save_to = database
    )
    all_in_one_computed.append(all_in_one_delayed)
_ = compute(all_in_one_computed)

## Distributions

In [None]:
#hist_paths = glob('/Users/amirhsi_mini/probe/N500D10.0ac0.8-segment/N500epsilon5.0r5.5lz205.5sig0.8nc12012dt0.002bdump1000adump5000ens1/N500epsilon5.0r5.5lz205.5sig0.8nc12012dt0.002bdump1000adump5000ens1*')
hist_paths = glob('/Users/amirhsi_mini/probe/N500D10.0ac0.8-segment/N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1/N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1*')
species = 'Crd'
direction = 'z'
geometry='biaxial'
group='all'
segments = organizer.sort_filenames(
                hist_paths,
                fmts=['-' + direction + 'Hist' + species + '.npy']
            )
edge_segments = organizer.sort_filenames(
                hist_paths,
                fmts=['-' + direction + 'Edge' + species + '.npy']
            )
wholes = organizer.whole(
                direction + 'Hist' + species,
                segments,
                geometry=geometry,
                group=group,
                relation='histogram',
                save_to=None
            )
edge_wholes = organizer.whole(
                direction + 'Edge' + species,
                edge_segments,
                geometry=geometry,
                group=group,
                relation='bin_edge',
                save_to=None
            )
            # 'whole' dataframes, each with a 'whole' columns.
rho_wholes, phi_wholes = distributions.distributions_generator(
                wholes,
                edge_wholes,
                group,
                species,
                geometry,
                direction,
                save_to=None,
normalized=True)

In [None]:
plt.hist(edge_wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'][:-1],edge_wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'],weights=wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'],histtype='step',density=True)
plt.show()

In [None]:
sns.histplot(edge_wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'][:-1],bins=edge_wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'],weights=wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'])
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=1,ncols=1,sharex=True,figsize=(8,6))
centers = 0.5*(edge_wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'][:-1]+edge_wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'][1:])
hist_df = pd.DataFrame(wholes)
rho_df = pd.DataFrame(rho_wholes)
phi_df = pd.DataFrame(phi_wholes)
df = pd.concat([hist_df,rho_df,phi_df],axis=1)
df.columns = ['histogram','number_density','volume_fraction']
df['center'] = centers
#df['histogram'] = df['histogram'] / df['histogram'].sum()
df['fake']= 1
#df.set_index('center',inplace=True)
#sns.histplot(x='center',bins=edge_wholes['N500epsilon5.0r5.5lz205.5sig0.8nc48047dt0.002bdump1000adump5000ens1'] ,weights='volume_fraction',data=df,element='poly',fill=False, kde=True)
#plt.show()
#df['histogram'].plot(ax=axes,ylabel='histogram')
#sns.set_theme(style="whitegrid")
#sns.set(font_scale=1.2)
sns.axes_style("darkgrid")
sns.lineplot(x='center',y='histogram', data=df,ax=axes)
#df.loc[-200:200,'number_density'].plot(ax=axes[1],ylabel='number_density')
#df.loc[-200:200,'volume_fraction'].plot(ax=axes[2],ylabel='volume_fraction',xlabel='center')
#axes.grid()
#axes.set_xlim(df.index[0]-5, df.index[-1]+5)
#axes.axvline(df.loc[df.index[0],'center'],lw=0.5,c='red',label='left end')
#axes.axvline(df.loc[df.index[-1],'center'],lw=0.5,c='green',label='right end')
#axes.axvline(df['center'],lw=0.5,c='red')
axes.set_xlabel('z (a.u.)')
axes.set_ylabel('Freqency of type-1 particles')
#ax.set_xlim[]
plt.savefig('histogram.pdf',dpi=200)

In [None]:

name = 'N500epsilon5.0r5.5lz205.5sig0.8nc36036dt0.002bdump1000adump5000ens1'
hist_info = SumRule(name, geometry='biaxial', group='all', lineage='whole')
dist_new = distributions.Distribution(
    wholes[name],
    edges[name],
    hist_info,
    'dcrowd',
    geometry='biaxial',
    direction='z',
    normalized=False)