In [None]:
# Importing necessary packages:
import re
import os
from glob import glob
import math
import numpy as np
import pandas as pd
import datetime as dt
import itertools

import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib as mpl
from matplotlib import rcParams, cycler
import matplotlib.lines as lines
from collections import OrderedDict
from matplotlib.legend import Legend
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
from matplotlib.collections import PolyCollection # for chaing confidence interval color in statsmodels.graphics.tsaplots.plot_acf

import seaborn as sns

import statsmodels.tsa.stattools as tsas
import statsmodels.graphics.tsaplots as tsap

from polyphys.visualize import plots
from polyphys.visualize import tuning
from polyphys.manage.parser import SumRule
from polyphys.manage import organizer

### Chain size vs $\phi_c$: 2021

In [None]:
parent = '/Users/amirhsi_mini/analysis/'
properties_path = "all_in_one-properties-ens_avg-normalized.csv"
properties = pd.read_csv(parent+properties_path,header=0)

In [None]:
# chain size plot
fontsize=20
properties_phi_c_trunc = properties[properties['phi_c_bulk_eff']<=0.4]
PipeLine.chainsize_plot(properties_phi_c_trunc, "phi_c_bulk", fontsize=fontsize)
PipeLine.chainsize_plot(properties_phi_c_trunc, "phi_c_bulk_eff", fontsize=fontsize)
PipeLine.chainsize_plot(properties_phi_c_trunc, "phi_c_bulk_normalized", fontsize=fontsize)
PipeLine.chainsize_plot(properties_phi_c_trunc, "phi_c_bulk_eff_normalized", fontsize=fontsize)

### The local distributions

In [None]:
#direction_name = 'radial'
parent = '/Users/amirhsi_mini/analysis/'
simulation_type = 'all' # all or bug
attribute = 'distributions'
direction_name = 'radial'
direction = 'r'
dist_name = 'phi'
y_norm = '_norm'
x_norm_mon = ''#'_mon'
x_norm_crd = ''#'_crd'
distributions = pd.read_csv(parent+'all_in_one-'+simulation_type+'-'+attribute+'-'+direction_name+'-ens_avg.csv',index_col=0)
group_names = list(set(distributions.group_name))
sns.set_context('paper')
sns.set_style("ticks")
for group_name in group_names:
    group = distributions[(distributions.group_name==group_name )& (distributions.phi_c_bulk != 0)]
    fig, axes = plt.subplots(nrows=2,ncols=1,figsize=(16,12))
    sns.lineplot(x=direction+'_norm'+x_norm_mon, y=dist_name+"_mon_"+direction+y_norm, hue='phi_c_bulk', data=group,ax=axes[0],legend='full')
    sns.lineplot(x=direction+'_norm'+x_norm_crd, y=dist_name+"_crd_"+direction+y_norm, hue='phi_c_bulk', data=group,ax=axes[1],legend='full')
    fname = group_name+'-'+dist_name+'-'+direction+".pdf"
    plt.savefig(fname, dpi=200)
    plt.close()

### The sum rule

In [None]:
parent = '/Users/amirhsi_mini/analysis/'
simulation_type = 'all' # all or bug
attribute = 'distributions'
direction_name = 'radial'
direction = 'r'
dist_name = 'phi'
y_norm = '_norm'
x_norm_mon = ''#'_mon'
x_norm_crd = ''#'_crd'
distributions = pd.read_csv(parent+'all_in_one-'+simulation_type+'-'+attribute+'-'+direction_name+'-ens_avg.csv',index_col=0)
group_names = list(set(distributions.group_name))
sns.set_context('paper')
sns.set_style("ticks")
for group_name in group_names:
    group = distributions[distributions.group_name==group_name]
    fig, axes = plt.subplots(nrows=2,ncols=1,figsize=(16,12))
    sns.lineplot(x=direction+'_norm'+x_norm_mon, y=dist_name+"_sumrule_"+direction, hue='phi_c_bulk', data=group,ax=axes[0],legend='full')
    sns.lineplot(x=direction+'_norm'+x_norm_crd, y=dist_name+"_sumrule_"+direction+'_norm', hue='phi_c_bulk', data=group,ax=axes[1],legend='full')
    fname = group_name+'-sum_rule-'+dist_name+'-'+direction+".pdf"
    plt.savefig(fname,dpi=200)
    plt.close()

In [None]:
#cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["brown", "pink", "limegreen"])
flar_cmap = mpl.colors.ListedColormap(sns.cm._flare_lut)
flare_cmap_cut = truncate_colormap(flar_cmap, 0.5, 1.0, n=15)
mpl.cm.register_cmap("flare_cmap_cut", flare_cmap_cut)
binary_cmap = plt.get_cmap('binary')
binary_cmap_cut = truncate_colormap(binary_cmap, 0, 1, n=15)
mpl.cm.register_cmap("binary_cmap_cut", binary_cmap_cut)

In [None]:
sns.color_palette("flare_cmap_cut", 15)

In [None]:
sumrule_information = {
    'r' :
    {'direction_name':'radial',
     'xlabel':r'${2r}/{D}$',
     'legend_loc':'lower left',
     'legend_nloc':1},
    'z' : 
    {'direction_name':'longitudinal',
     'xlabel':r'${2z}/{z_{max}}$',
     'legend_loc':'lower left',
     'legend_nloc':1}
}
def plot_type_infomation(plot_type, direction):
    plot_type_dict = {
        'mon':{
            'yname':'mon',
            'ylabel':r'$\phi_m('+direction+')}{a_m}$'

        },
        'crd':{
            'yname':'crd',
            'ylabel':r'$\phi_c('+direction+')}{a_c}$'
        },
        'sumrule':{
            'yname':'sumrule',
            'ylabel':r'$\sum\frac{\phi_i('+direction+')}{a_i}$'
        }
    }
    return plot_type_dict[plot_type]

In [None]:
direction = 'z'
plot_type = 'sumrule'
dist_name = 'phi'
x_norm_mon = ''#'_mon'
x_norm_crd = ''#'_crd'
plot_type_dict = plot_type_infomation(plot_type, direction)
#plot_type = '_sumrule_' #'_crd_', '_mon_', 'sumrule'
parent = '/Users/amirhsi_mini/analysis/'
simulation_type = 'all' # all or bug
attribute = 'distributions'
direction_name = 'longitudinal'
y_norm = '_norm'
distributions = pd.read_csv(parent+'all_in_one-'+simulation_type+'-'+attribute+'-'+direction_name+'-ens_avg.csv',index_col=0)
distributions = distributions.round({"phi_c_bulk":3})
group_names = list(set(distributions.group_name))
compression_rates = {}
for group_name in group_names:
    group_attributes = PipeLine.cellAttributes(group_name,'cylindrical',cell_type='group',warning=False)
    compression_rates[group_name]  = round(group_attributes.dcrowd/(group_attributes.dcyl-group_attributes.dcrowd),2)
compression_rates = dict(sorted(compression_rates.items(), key=lambda item: item[1]))
compression_rate_colors = sns.color_palette("binary_cmap_cut", len(group_names))#,as_cmap=True)

phi_c = list(set(distributions.phi_c_bulk))
phi_c = [0.1, 0.15, 0.2, 0.225, 0.25, 0.275, 0.3, 0.325, 0.35, 0.375, 0.4]
#phi_c_colors = sns.color_palette("PuRd", len(phi_c))#,as_cmap=True)
phi_c_colors = sns.color_palette("flare_cmap_cut", len(phi_c))#,as_cmap=True)
sns.set_context('paper')
sns.set_style("white")
if plot_type == 'sumrule':
    fig, axes = plt.subplots(nrows=4,ncols=3,figsize=(16,12),sharey=True)
else:
    fig, axes = plt.subplots(nrows=4,ncols=3,figsize=(16,12), sharey=True)#'row')
fontsize = 16
for idx, (group_name, ax) in enumerate(zip(compression_rates.keys(),axes.flat)):
    group_attributes =PipeLine.cellAttributes(group_name,'cylindrical',cell_type='group',warning=False)
    group = distributions[(distributions.group_name==group_name) & (distributions.phi_c_bulk!=0.0)]
    group_sumrule_rhd = distributions[(distributions.group_name==group_name) & (distributions.phi_c_bulk==0.0)]
    #ax.set_facecolor('gainsboro')
    ax.set_facecolor(compression_rate_colors[idx])
    #if  plot_type== 'sumrule':
    ax.plot(group_sumrule_rhd[direction+'_norm'+x_norm_crd], group_sumrule_rhd[dist_name+"_"+direction+'_uniform_sum_norm'],c='blue',ls='-')
    ax.grid(True,ls=':',lw=0.75,c='green')
    ax.tick_params(axis ='both',direction='inout',width=1,labelsize=fontsize-2,color='black')
    if plot_type != 'sumrule':
        if direction == 'r':
            #if idx > 5:
             #   PipeLine.yticks(ax,(0.0,2.5,0.25,0.05),code=True,fontsize=fontsize-2)
            #else : 
            if plot_type == 'crd':
                PipeLine.yticks(ax,(0.0,2.5,0.25,0.05),code=True,fontsize=fontsize-2)
            else :
                PipeLine.yticks(ax,(0.0,1.2,0.2,0.04),code=True,fontsize=fontsize-2)
            PipeLine.xticks(ax,(0.0,1,0.2,0.02),code=True,fontsize=fontsize-2)
        if direction == 'z':
            if plot_type == 'crd':
                pass
                #yticks(ax,(0.6,1.0,0.1,0.02),code=True,fontsize=fontsize-2)
            else:
                PipeLine.yticks(ax,(0.0,1.0,0.2,0.04),code=True,fontsize=fontsize-2)
            PipeLine.xticks(ax,(-1.0,1,0.2,0.04),code=True,fontsize=fontsize-2)
    ax.set_title(r"$({}):D={},a_c={},\kappa={}$".format(idx+1, group_attributes.dcyl,group_attributes.dcrowd,compression_rates[group_name]),fontsize=fontsize+2)
   
    ax = sns.lineplot(x=direction+'_norm'+x_norm_crd, y=dist_name+'_'+plot_type_dict['yname']+'_'+direction+'_norm', hue='phi_c_bulk', data=group,ax=ax,palette=phi_c_colors,legend=False)
    ax.set_ylabel(plot_type_dict['ylabel'],fontsize=fontsize+1)
    ax.set_xlabel(sumrule_information[direction]['xlabel'],fontsize=fontsize)
    
    
phi_c_patches = PipeLine.color_patcher(phi_c_colors)
phi_c_legend = Legend(axes[1,2],handles=phi_c_patches,labels= phi_c,title=r'$\phi_c^{(bulk)}$ (line color)',ncol=sumrule_information[direction]['legend_nloc'],
                      title_fontsize=fontsize-2,fontsize=fontsize-4,framealpha=None,frameon=True,loc=sumrule_information[direction]['legend_loc'],bbox_to_anchor=(1.05,-0.02),edgecolor='black')
axes[1,2].add_artist(phi_c_legend)
compression_rates_patches = PipeLine.color_patcher(compression_rate_colors)
compression_rate_legend = Legend(axes[3,2],handles=compression_rates_patches,labels= compression_rates.values(),title=r'$\kappa$ (background color)',ncol=sumrule_information[direction]['legend_nloc'], title_fontsize=fontsize-2,fontsize=fontsize-4,framealpha=None,frameon=True,loc=sumrule_information[direction]['legend_loc'],bbox_to_anchor=(1.05,-0.02),edgecolor='black')
axes[3,2].add_artist(compression_rate_legend)

if  plot_type== 'sumrule':
    sumrule_rhs_patches = mlines.Line2D([], [], color='blue', lw=2, ls='-', label=r'$\sum\frac{\phi_i}{a_i}=1$')
    axes[2,2].legend(handles=[sumrule_rhs_patches],fontsize=fontsize,frameon=True,bbox_to_anchor=(1.5,0.92),edgecolor='black')

fname = dist_name+'-'+plot_type+'-'+direction+".pdf"
fig.tight_layout()
plt.savefig(fname,dpi=200,bbox_inches='tight')
plt.close()

### End-to-end distribution

In [None]:
#direction_name = 'radial'
parent = '/Users/amirhsi_mini/analysis/'
simulation_type = 'bug' # all or bug
attribute = 'rFloryHists'
direction = 'r'
y_norm = '_norm'
distributions = pd.read_csv(parent+'all_in_one-'+simulation_type+'-'+attribute+'-ens_avg.csv',index_col=0)
group_names = list(set(distributions.group_name))
sns.set_context('paper')
sns.set_style("ticks")

In [None]:
for group_name in group_names:
    group = distributions[(distributions.group_name==group_name )& (distributions.phi_c_bulk != 0)]
    fig, axes = plt.subplots(nrows=2,ncols=1,figsize=(16,12))
    sns.lineplot(x=direction+'_norm'+x_norm_mon, y=dist_name+"_mon_"+direction+y_norm, hue='phi_c_bulk', data=group,ax=axes[0],legend='full')
    sns.lineplot(x=direction+'_norm'+x_norm_crd, y=dist_name+"_crd_"+direction+y_norm, hue='phi_c_bulk', data=group,ax=axes[1],legend='full')
    fname = group_name+'-'+dist_name+'-'+direction+".pdf"
    plt.savefig(fname,dpi=200)
    plt.close()

# Investigating size statisitcs:

### Autocorrelation function (acf):

Below, all the several definitions of autocorrelation function (acf) in the literature are listed. Assuming $A(t)$ is a discrete time-varying property of interest and is collected every $M$ timesteps in a molecular dynamics simulation with the time step $\Delta t$ and time unit $\tau$. First, we define the mean (or average) and unbiased variance of $A(t)$ as what follows

$$\langle A\rangle=\frac{1}{t_{max}}\sum_{t=1}^{t_{max}}A(t)$$
$$\sigma^2_A=\langle A^2 \rangle-\langle A\rangle^2=\frac{1}{t_{max}-1}\sum_{t=1}^{t_{max}}(A(t)-\langle A\rangle)^2$$

where $t_{max}$ is the total number of collected cofigurations or timesteps, or simply the size of $A(t)$, implying the total number of simulated timesteps is $t_{max}\times M$. Given this definition, the acf has been defined in the following ways in the literature:

1. Allen and Tildesley define the *non-normalized* acf in "Computer simulation of liquids - 2017" as

$$c_{AA}(h) = \frac{1}{h_{max}}\sum_{h_0=1}^{h_{max}}A(h_0)A(h_0+h)$$

where $h_{max}=\frac{t_{max}-1}{h}$

2. *1989 - Murat M Greet GS - Structure of a Grafted Polymer Brush A Molecular Dynamics Simulation* defines the acf in "Structure of a Grafted Polymer Brush A Molecular Dynamics Simulation - 1989" as 

$$c_{AA}(h) =\frac{\langle (A(h)-\langle A \rangle)(A(0)-\langle A \rangle)\rangle}{\sigma^2_A}=\frac{\langle A(h)A(0)\rangle-\langle A \rangle \langle A(0) \rangle}{\sigma^2_A}$$

See the rest of this artciel to see how correlation time is calculated. See the rest of this artciel to see how correlation time is calculated.

3. In *1989 - Grest GS Kremer K Witten TA et el - Relaxation of Self-Entangled Many-Arm Star Polymers, 1987 - Grest GS Kremer K Witten TA - Structure of Many-Arm Star Polymers A Molecular Dynamics Simulation, and 1977 - Kranbuehl DE Verdier PH - Relaxation of the aspherical shapes of random-coil polymer chains*, the acf is

$$c_{AA}(h) =\frac{\langle A(h)A(0)\rangle-\langle A \rangle^2}{\sigma^2_A}$$

where the average is performed over the initial time steps $h=h_0=0$ taken every $K$ timesteps. Kranbuehl and Verdier runs the whole simulation several times the relaxation time of the end-to-end length.

4. In *1978 - Rapaport DC - Molecular dynamics simulation of polymer chains with excluded volum, and 1979 - Bishop M Ceperley D Frisch HL - Molecular dynamics of polymeric systems* the afc is given by

$$c_{AA}(h) =\frac{\langle A(h)A(h_0+h)\rangle-\langle A(h_0) \rangle^2}{\sigma^2_A(h_0)}$$

where

$$\sigma^2_A(h_0)=\langle A(h_0)^2 \rangle-\langle A(h_0)\rangle^2$$

The averaging should be done over ensembles; however, in practice, the ensemble average is replaced by the time average provided that the timesteps or time interval between successvie $h_0$ values are sufficently lagre so the configurations are uncorrelated.

5. *1981 - Bruns W Bansal R - Molecular dynamics study of a single polymer chain in solution* defines the afc as 

$$c_{\vec{A}\vec{A}}(h) = \frac{\sum_{h_0=1}^{t_{max}-h}\vec{A}(h_0).\vec{A}(h_0+h)}{\sum_{h_0=1}^{t_{max}-h}\vec{A}^2(h_0)}$$

where $\vec{A}$ is a vector quantity and $h_0$ is summed over $1,1+p,1+2p,\dots$ instead of $h_0=1,2,3,\dots,t_{max}-h$. For a scalar quantity, this definition is used:

$$c_{AA}(h) = \frac{(t_{max}-h)\sum_{h_0=1}^{t_{max}-h}A(h_0)A(h_0+h)-\sum_{h_0=1}^{t_{max}-h}A(h_0)\sum_{h_0=1}^{t_{max}-h}A(h_0+h)}{\sqrt{(t_{max}-h)\sum_{h_0=1}^{t_{max}-h}A^2(h_0)-[\sum_{h_0=1}^{t_{max}-h}A^2(h_0)]^2}-\sqrt{(t_{max}-h)\sum_{h_0=1}^{t_{max}-h}A^2(h_0+h)-[\sum_{h_0=1}^{t_{max}-h}A^2(h_0+h)]^2}}=
\frac{\langle A(h_0)A(h_0+h)\rangle-\langle A(h_0)\rangle\langle A(h_0+h)\rangle}{\sqrt{\langle A^2(h_0)\rangle-\langle A(h_0)\rangle^2}-\sqrt{\langle A^2(h_0+h)\rangle-\langle A(h_0+h)\rangle^2}}$$

6. *Brockwell PJ Davis RA - 2016 - Introduction to Time Series and Forecasting* defines the afc in the following way

$$c_{AA}(h) = \frac{cov_{A}(h)}{\sigma_A^2} = \frac{1}{\sigma_A^2}\frac{1}{t_{max}}\sum_{h_0=1}^{t_{max}-h} [A(h_0+h)-\langle A\rangle][A(h_0)-\langle A\rangle]$$




## Ensemble-averaged data:

### settings

In [None]:
database = '/Users/amirhsi_mini/analysis/'
acf = pd.read_csv(database + "allInOne-bug-chainSize-acf.csv", header=0)
chain_size = pd.read_csv(database + "allInOne-bug-chainSize.csv", header=0)
flar_cmap = mpl.colors.ListedColormap(sns.cm._flare_lut)
flare_cmap_cut = tuning.truncated_colormap(flar_cmap,  min_value=0.0, max_value=1.0, ncolors=200)
mpl.cm.register_cmap("flare_cmap_cut", flare_cmap_cut)
spaces = acf.loc[:,'space'].drop_duplicates().sort_values()
spaces = sorted(spaces, key = organizer.sort_by_alphanumeric)
phi_crds = np.around(np.array(list(set(np.around(acf.loc[:,'phi_c_bulk'].drop_duplicates().to_numpy()/0.025,decimals=0)))) * 0.025,decimals=3) # ridiculous!
phi_colors = sns.color_palette("flare_cmap_cut", len(phi_crds))#,as_cmap=True)
properties =  {"rfloryTMon":
                    {"name": "the Flory radius", "symbol":r"$C_{R_FR_F}(\hat{t})$", 'color':'firebrick'}, 
             "gyrTMon":
                    {"name": "the radius of gyration", "symbol":r"$C_{R_gR_g}(\hat{t})$",'color':'steelblue'}, 
             "fsdTMon": 
                    {"name": "the furthermost distance", "symbol":r"$C_{LL}(\hat{t})$",'color':'forestgreen'}}

In [None]:
# properties
size_properties_name = [property_ for property_ in chain_size.columns if '-mean' in property_]
size_properties_name.sort()
size_properties_name

### Chain size

In [None]:
sns.set_context('paper')
sns.set_style("ticks")
property_ = size_properties_name[2]
for space in spaces:
    size_space = chain_size.loc[chain_size['space']==space]
    ensembles = size_space['ensemble'].drop_duplicates().sort_values()
    ensembles = sorted(ensembles, key = organizer.sort_by_alphanumeric)
    fig, axes = plt.subplots(nrows=1,ncols=1,figsize=(16,12))
    sns.lineplot(
        x='time',
        y=property_,
        hue='phi_c_bulk',
        data=size_space,
        ax=axes,
        legend='full')
    fname = property_+'-'+space+".pdf"
    plt.savefig(fname,dpi=200)
    plt.close()

### Correlation functions

#### ACF with CIs for a group of physical properties

In [None]:
nlags=100000
for space in spaces:
    acf_space = acf[acf.space==space]
    ensembles = acf_space['ensemble'].drop_duplicates().sort_values()
    ensembles = sorted(ensembles, key = organizer.sort_by_alphanumeric)
    plots.acf_plot_with_ci(
        acf_space,
        ensembles,
        space,
        properties,
        phi_crds,
        xlimits=(0, nlags, 20000),
        lags=nlags
    )

#### ACF of each attribute in all the simulation groups

In [None]:
property_= 'gyrTMon'
legend_anchor = (1.1,1.02)
plots.acf_plot_group(
    acf,
    spaces,
    property_,
    properties[property_],
    phi_crds,
    phi_colors,
    xlimits=(0, nlags, 10000),
    nrows=1,
    ncols=1,
    legend_anchor=legend_anchor,
    lags=nlags
)  

# Curve fitting

## Ensemble data

### Measuring correlation time

In [None]:
import scipy.optimize
def monoExpTau(x, m, t, b, alpha):
    return m * np.exp(-1 * (x/t) ** alpha) + b
def monoExp(x, m, t, b, alpha):
    return m * np.exp(-1 * t * x ** alpha) + b
def monoExpNoB(x, m, t, alpha):
    return m * np.exp(-1 * (x/t) ** alpha)

In [None]:
params_dict = {
    'm' : [],
    't' : [],
    'b' : [],
    'alpha' : []
}
fig, axes = plt.subplots(4,3, figsize=(16,12))
space = spaces[0]
space_df = acf.loc[acf['space']==space, :]
ensembles = space_df['ensemble'].drop_duplicates().sort_values()
ensembles = sorted(ensembles, key = organizer.sort_by_alphanumeric)
axes = axes.flat
func = monoExp
for idx in range(12):
    acf_ens = space_df.loc[space_df['ensemble']==ensembles[idx],:]
    acf_ens.reset_index(inplace=True, drop=True)
    for col in ['gyrTMon-acf']:#,'fsdTMon-acf-mean', 'rfloryTMon-acf-mean']:
        x =  acf_ens.loc[:,'time'].values
        x = x / x.max()
        x = x + x[1]
        y = acf_ens.loc[:,col + '-mean'].values
        #sigma = np.sqrt(acf_ens.loc[:, col + '-var'].values)
        params, cv = scipy.optimize.curve_fit(
            func,
            x,
            y
        )
        params_dict['m'].append(params[0])
        params_dict['t'].append(params[1])
        params_dict['b'].append(params[2])
        params_dict['alpha'].append(params[3])
        axes[idx].plot(x, y, '.', label=ensembles[idx])
        axes[idx].plot(x, func(x, params[0], params[1], params[2], params[3]), '--', label= ensembles[idx])
    
        axes[idx].legend()
fig.suptitle("Fitted Exponential Curve")

In [None]:
plt.plot(phi_crds, params_dict['t'])

In [None]:
params_dict = {
    'm' : [],
    't' : [],
    'b' : [],
    'alpha' : []
}
fig, axes = plt.subplots(4,3, figsize=(16,12))
space = spaces[0]
space_df = chain_size.loc[chain_size['space']==space, :]
ensembles = space_df['ensemble'].drop_duplicates().sort_values()
ensembles = sorted(ensembles, key = organizer.sort_by_alphanumeric)
axes = axes.flat
func = monoExp
samples = 300001
for idx in range(12):
    acf_ens = space_df.loc[space_df['ensemble']==ensembles[idx],:]
    acf_ens.reset_index(inplace=True, drop=True)
    for col in ['gyrTMon']:#,'fsdTMon-acf-mean', 'rfloryTMon-acf-mean']:
        x =  acf_ens.loc[:samples,'time'].values
        #x =  acf_ens.index.values
        #x = x / x.max()
        x = x + x[1]
        y = acf_ens.loc[:samples,col + '-mean'].values
        #y_min = y.min()
        #y_max = y.max()
        #y = (y - y_min) / (y_max - y_min)
        y_mean = y.mean()
        y_std = y.std()
        y = (y - y_mean) / y_std
        #sigma = np.sqrt(acf_ens.loc[:, col + '-var'].values)
        params, cv = scipy.optimize.curve_fit(
            func,
            x,
            y,
            maxfev = 5000
        )
        params_dict['m'].append(params[0])
        params_dict['t'].append(params[1])
        params_dict['b'].append(params[2])
        params_dict['alpha'].append(params[3])
        axes[idx].plot(x, y, '.', label=phi_crds[idx])
        axes[idx].plot(x,
                       func(x, params[0], params[1], params[2], params[3]),
                       '--',
                       label= ensembles[idx])
    
        axes[idx].legend()
fig.suptitle("Fitted Exponential Curve")

In [None]:
x[1]

In [None]:
params_dict = {
    'm' : [],
    't' : [],
    'b' : [],
    'alpha' : []
}
fig, axes = plt.subplots(4,3, figsize=(16,12))
space = spaces[0]
space_df = chain_size.loc[chain_size['space']==space, :]
ensembles = space_df['ensemble'].drop_duplicates().sort_values()
ensembles = sorted(ensembles, key = organizer.sort_by_alphanumeric)
axes = axes.flat
func = monoExp
samples = 100000
for idx in range(12):
    acf_ens = space_df.loc[space_df['ensemble']==ensembles[idx],:]
    acf_ens.reset_index(inplace=True, drop=True)
    for col in ['gyrTMon']:#,'fsdTMon-acf-mean', 'rfloryTMon-acf-mean']:
        x =  acf_ens.loc[:samples,'time'].values
        #x =  acf_ens.index.values
        #x = x / x.max()
        x = x + x[1]
        y = acf_ens.loc[:samples,col + '-mean'].values
        #y_min = y.min()
        #y_max = y.max()
        #y = (y - y_min) / (y_max - y_min)
        y_mean = y.mean()
        y_std = y.std()
        y = (y - y_mean) / y_std
        #sigma = np.sqrt(acf_ens.loc[:, col + '-var'].values)
        axes[idx].plot(x, y, '.', label=phi_crds[idx])
        axes[idx].legend()
fig.suptitle("Fitted Exponential Curve")

In [None]:
plt.plot(phi_crds, params_dict['alpha'])

In [None]:
def fit_wholes(
    property_path: str,
    property_: str,
    fit_func,
    ftt_name: str,
    fit_params,
    property_pattern: str = 'N*',
    group: str = 'bug',
    geometry: str = 'biaxial',
    save_to: str = None,
    **kwargs
) -> pd.DataFrame:
    """take the `property_path` to the directory in which the ansemble-average
    timeseries of a given physical `property_` of a given `group` in a given
    `geometry`, and performs the following operations in the `orient` of
    interest: First, it concatenates the timeseries into one dataframe along
    the 0 or 'row' or 'index' in pandas's lingo, and thenadds the physical
    `attributes` of interest as the name columns to the concatenated
    timeseries.

    In each 'ensemble-averaged' dataframe, there are 3 columns with
    this name patter:
    column name = 'long_ensemble-group-porperty_[-measure]-stat'
    where '[-measure]' is a physical measurement such as the auto correlation
    function (AFC) done on the physical 'property_'. [...] means this keyword
    in the column name can be optional. the 'stat' keyword is either 'mean',
    'ver', or 'sem'.

    Parameters
    ----------
    property_path: str
        Path to the the timeseries of the physical property of interest.
    property_: str
        Name of the physical property of interest.
    property_pattern: str, default 'N*'
        The pattern by which the filenames of timeseries are started with.
    attributes: list, default None
        The physical attributes that will added as new columns to the
        concatenated timeseries.
    group: {'bug', 'all'}, defualt 'bug'
        The type of the particle group.
    geometry : {'biaxial', 'slit', 'box'}, default 'biaxial'
        The shape of the simulation box.
    save_to : str, default None
        An/a absolute/relative path of a directory to which outputs are saved.

    Return
    ------
    all_in_one: pandas.DataFrame
        a dataframe in which all the timeseries are concatenated along `orient`
        of interest, and "properties and attributes" of interest are added to
        it as the new columns.
    """
    property_ext = '-' + property_ + '.csv'
    property_csvs = glob(property_path + '/' + property_pattern + property_ext)
    property_csvs = organizer.sort_filenames(property_csvs, fmts=[property_ext])
    params_std = [param + '-std' for param in fit_params]
    cols = ['whole'] + fit_params + params_std
    fit_data = []
    for property_csv in property_csvs:
        property_df = pd.read_csv(property_csv[0], header=0)
        # the first column of porperty_df is used to extract
        # the information about the property and the space it
        # belongs to.
        for col in property_df.columns:
            whole_name = col.split('-')[0]
            whole_info = SumRule(
                whole_name,
                geometry = 'biaxial',
                group = 'bug',
                lineage = 'whole',
                ispath = False
            )
            whole_data = [whole_name]
            y = property_df.loc[:,col].values
            y_mean = y.mean()
            y_std = y.std()
            y = (y - y_mean) / y_std
            x = (np.arange(len(y)) + 1.0) * whole_info.dt
            try:
                #print("fitting " + whole_name + "data ...")
                params = np.arange(len(fit_params)) * 0.0
                cov_mat = np.arange(len(fit_params)) * 0.0
                params, cov_mat = scipy.optimize.curve_fit(
                    fit_func,
                    x,
                    y,
                    **kwargs
                )
                whole_data.extend(params)
                whole_data.extend(np.diag(cov_mat))
                fit_data.append(whole_data)
            except RuntimeError as e:
                print("could not fit " + whole_name)
                continue
    fit_df = pd.DataFrame(data=fit_data,columns=cols)
    if save_to is not None:
        output = "-".join(property_.split("-")[:2])  # dropping "-ensAvg"
        output = "-".join(["allInOne", group, output])
        property_db.to_csv(save_to + output + ".csv", index=False)
    return fit_df

In [None]:
# list of unique property_measures:
database = '/Users/amirhsi_mini/analysis/'
bug_property_measures = glob(database+"/N*-ens"+"/N*.csv")
bug_property_measures = list(set(["-".join(property_measure.split("/")[-1].split(".csv")[0] .split("-")[2:]) for property_measure in bug_property_measures]))
bug_property_measures.remove("stamps-whole")
bug_property_measures.sort()
bug_property_measures

In [None]:
# allInOne timeseries for chain-size statistics
# whole cannot be fitt:
# N2000epsilon5.0r15.5lz379.5sig4.0nc4003dt0.005bdump1000adump5000ens4
group = 'bug'
geometry = 'biaxial'
ens_path = "/Users/amirhsi_mini/analysis/N2000D30.0ac4.0-bug-ens"
fit_df = fit_wholes(
    ens_path,
    'gyrTMon',
    monoExp,
    'monoExp',
    ['m', 't', 'b', 'alpha'],
    #property_pattern='N*nc6004*',
    group=group,
    geometry=geometry,
    #p0=[1.2, 400, 1.5, 2],
    maxfev=5000)

In [None]:
fit_df.to_csv("fit_df.csv", index=False)

# Curve-fitting

In [None]:
fig, axes = plt.subplots(1,1)
chain_size_ens = chain_size.loc[chain_size['ensemble']==ensembles[2],:]
chain_size_ens.reset_index(inplace=True, drop=True)
#chain_size_ens = chain_size_ens.loc[:50000,:]
for col in ['gyrTMon-mean']:#,'fsdTMon-acf-mean', 'rfloryTMon-acf-mean']:
    x = chain_size_ens.index.values
    y = chain_size_ens.loc[:,col].values
    params, cv = scipy.optimize.curve_fit(
        monoExpSize,
        x,
        y)
    m, t, b, alpha = params

#### AFC with fit curves

In [None]:
fontsize = 16
for group_name in group_names:
    print(group_name)
    acf_group = acf[(acf.group_name==group_name) & (acf['gyr_t-acf_only'] >= 1/np.e) | (acf['fsd_t-acf_only'] >= 1/np.e) | (acf['rFlory_t-acf_only'] >= 1/np.e)]
    ens_names = acf_group['ens_name'].drop_duplicates().sort_values()
    ens_names = sorted(ens_names, key = PipeLine.sort_by_int) # sorted by number of crowders (volume fraction of crowder)
    #PipeLine.acf_plot_with_ci(acf_group, ens_names, group_name, attrs_dict, phi_crds)
    fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(16,12), sharey=True)
    mpl.rcParams['font.family'] = "Times New Roman"
    mpl.rcParams['mathtext.default'] = "regular"
    for idx, (ax, ens_name, phi_c, color) in enumerate(zip(axes.flat, ens_names, phi_crds, phi_colors)):
        ax.axhline(y=0, c='black', ls='--', lw=1)
        ax.axhline(y=1/np.e, c='black', ls='-', lw=1,alpha=0.5,label=r'$C_x(t)=e^{-1}$')
        ax.legend()
        acf_ens = acf_group[acf_group.ens_name==ens_name]
        acf_ens.reset_index(inplace=True)
        print(acf_ens['gyr_t-acf_only'].head(3))
        #acf_ens[['gyr_t-acf_only','fsd_t-acf_only', 'rFlory_t-acf_only']].plot(ax=ax, legend=False, color=color)
        ax.grid(True,ls=':', lw=0.75, c='black')
        ax.tick_params(axis ='both', direction='inout', width=1, labelsize=fontsize-4, color='black')   
        ax.text(0, 0.1, fr'$\phi_c^{{(bulk)}}={phi_c}$', fontsize=fontsize-2)   
        legend_colors = []
        legend_labels = []

        for attr, attr_dict in attrs_dict.items():
            acf_ens[attr+'-acf_only'].plot(ax=ax, color=attr_dict['color'], alpha=0.7)
            params, cv = scipy.optimize.curve_fit(monoExp, acf_ens.index, acf_ens[attr+'-acf_only'].values)
            m, t, b = params
            ax.plot(acf_ens.index, monoExp(acf_ens.index, m, t, b), ':', color=attr_dict['color'])
            legend_colors.append(attr_dict['color'])
            #legend_colors.append(colors.to_rgba(attr_dict["color"],0.25))
            legend_labels.append(attr_dict['symbol'])
        
        #yticks(ax, ylimits, code=True, fontsize=fontsize-6, decimals=3)
        #xticks(ax, (0, lags, 1000), code=True, fontsize=fontsize-6, decimals=3)
        if idx % 3 == 0:
            ax.set_ylabel(r"$C(\hat{t})$", fontsize=fontsize-2)
        if idx >= 9:
            ax.set_xlabel(r"$\hat{t}=lag\times {\Delta t_{sampling}}/{\tau}$", fontsize=fontsize-2) 
    
    phi_c_patches = PipeLine.color_patcher(legend_colors)
    phi_c_legends = Legend(axes[0,2], handles=phi_c_patches, labels=legend_labels, title='Size measures (colors)', title_fontsize=fontsize-2, fontsize=fontsize-4, framealpha=None, frameon=False, bbox_to_anchor=(1.02,1.02))
    axes[0,2].add_artist(phi_c_legends)
    
    ls_labels = [r"$a \times exp(\frac{\hat{t}}{\tau}) + b$", r'$C_{x}(\hat{t})=e^{-1}$', r"$C_{x}(\hat{t})=0$"]
    ls_c_patches = PipeLine.ls_handler(ls_labels,[':',"-","--"])
    ls_c_legends = Legend(axes[1,2], handles=ls_c_patches, labels=ls_labels, title='Line Style', title_fontsize=fontsize-2, fontsize=fontsize, framealpha=None, frameon=False, bbox_to_anchor=(1.02,1.02))
    axes[1,2].add_artist(ls_c_legends)
    
    cell_attrs = PipeLine.cellAttributes(group_name, geometry='cylindrical', cell_type=None, warning=False)
    fig.suptitle(fr"the ACF plot and their associated exponential fits for $C_{{x}}(\hat{{t}})<e^{{-1}}$ in a system with $N={cell_attrs.nmon}$, $D={cell_attrs.dcyl}$, $a={cell_attrs.dcrowd}$.",fontsize=fontsize+2)
    fname = "acf-exp_fit-"+group_name+"."+'pdf'
    fig.tight_layout()
    plt.savefig(fname, bbox_inches='tight')
    plt.close()  