In [None]:
# Importing necessary packages:
#import re
#import os
from glob import glob
#import math
import numpy as np
import pandas as pd
#import datetime as dt
#import itertools

import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib as mpl
from matplotlib import rcParams, cycler
import matplotlib.lines as lines
from collections import OrderedDict
from matplotlib.legend import Legend
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
from matplotlib.collections import PolyCollection # for chaing confidence interval color in statsmodels.graphics.tsaplots.plot_acf

import seaborn as sns

import statsmodels.tsa.stattools as tsas
import statsmodels.graphics.tsaplots as tsap

from polyphys.visualize import plotter
from polyphys.visualize import tuner
from polyphys.manage.parser import SumRule, TransFoci
from polyphys.manage import organizer

from polyphys.analyze import correlations 
from scipy import optimize

import inspect

### Importing data

- **Cuation: project-specific settings**

In [None]:
# loading databases:
database = '/Users/amirhsi_mini/research_data/analysis/'
project_name = 'SumRule'
spaces_to_read = ["N2000D30.0ac4.0","N2000D30.0ac6.0"]
#parser=TransFoci
parser=SumRule
#project_name = 'TransFoci'
#spaces_to_read = ["ns400nl5al5D20ac1"]
group = "bug"
species = "Mon"

chainsize = []
for space in spaces_to_read:
    space_middle_name = "-".join([space, group, species])
    space_chainsize = pd.read_parquet(database + space_middle_name + "-allInOne-chainSize.parquet.brotli")
    chainsize.append(space_chainsize)
chainsize = pd.concat(chainsize,axis=0, ignore_index=True)
chainsize.reset_index(inplace=True,drop=True)

### Some explorations:

In [None]:
chainsize.head()

In [None]:
chainsize.columns

In [None]:
chainsize.info()

### Grouping and filtering

Since the datasets we work with are large, it is a good idea to 
- define some filters to ease subsetting the dataframes.
- define distinguishable colors for the volume fraction of crowders as the changing parameter that defines different ensAvg groups in a space
- round the value of this changing parameter.

- **Cuation: project-specific settings**

In [None]:
# droping some of the columns
chainsize.drop(columns=['asphericityTMon-var',
       'asphericityTMon-sem','bondsTFoci-var', 'bondsTFoci-sem' , 'clustersTFoci-var', 'clustersTFoci-sem',
        'fsdTMon-var', 'fsdTMon-sem', 
       'gyrTMon-var', 'gyrTMon-sem', 'rfloryTMon-var',
       'rfloryTMon-sem', 'shapeTMon-var', 'shapeTMon-sem'],inplace=True)

In [None]:
chainsize.drop(columns=['asphericityTMon-var',
       'asphericityTMon-sem', 
        'fsdTMon-var', 'fsdTMon-sem', 
       'gyrTMon-var', 'gyrTMon-sem', 'rfloryTMon-var',
       'rfloryTMon-sem', 'shapeTMon-var', 'shapeTMon-sem'],inplace=True)

In [None]:
# a sorted list of unique spaces in the dataset
spaces = chainsize.loc[:,'space'].drop_duplicates().sort_values()
spaces = sorted(spaces, key = organizer.sort_by_alphanumeric)
print(spaces)
# rounding phi_c as facgtors of 0.025 upt to 3 decimal digits:
round_to = 0.025
phi_crds = chainsize.loc[:,'phi_c_bulk'].drop_duplicates().sort_values().values
phi_crds = np.round(np.round(phi_crds/round_to) * round_to, 3)
phi_crds = np.unique(phi_crds)
phi_crds.sort()
print(phi_crds)
print("Number of unique phi_c_bulk:", len(phi_crds))
# setting colors for unique crd_c
flar_cmap = mpl.colors.ListedColormap(sns.cm._flare_lut)
flare_cmap_cut = tuner.truncated_colormap(flar_cmap,  min_value=0.0, max_value=1.0, ncolors=200)
mpl.cm.register_cmap("flare_cmap_cut", flare_cmap_cut)
phi_colors = sns.color_palette("flare_cmap_cut", len(phi_crds))#,as_cmap=True)
# add rounded phi_crds to the dataset
rounding_func = lambda x, round_to: np.round(np.rint((x / round_to)) * round_to, 3)
chainsize['phi_c_bulk_round'] = chainsize['phi_c_bulk'].apply(
    rounding_func, args=[round_to]
)
# Define a list of unique physical properties:
properties = [property_.split('-mean')[0] for property_ in chainsize.columns if '-mean' in property_]
# change the name of cols after fining properties based on "mean" in their name
new_columns = [property_.split('-mean')[0] for property_ in chainsize.columns]
properties.sort()
chainsize.columns = new_columns
print(properties)
# Define unique specifications for each physical property:
properties_specs =  {
    'rfloryTMon': {
        'name': 'Flory radius',
        'symbol': r'$R_F(\hat{t})$',
        'color': 'firebrick'
    },
    'gyrTMon': {
        'name': 'radius of gyration',
        'symbol': r'$R_g(\hat{t})$',
        'color':'steelblue'
               },
    'fsdTMon': {
        'name': 'furthermost distance',
        'symbol': r'$L(\hat{t})$',
        'color': 'forestgreen'
    },
    'asphericityTMon': {
        'name': 'asphericity',
        'symbol': r'$\Delta(\hat{t})$',
        'color': 'goldenrod'
    },
    'shapeTMon': {
        'name': 'shape parameter',
        'symbol': r'$S(\hat{t})$',
        'color': 'orchid'
    },
    'bondsTFoci': {
        'name': 'bond size',
        'symbol': r'$B(\hat{t})$',
        'color': 'orchid'
    },
    'clustersTFoci': {
        'name': 'cluster size',
        'symbol': r'$C(\hat{t})$',
        'color': 'orchid'
    }
}
titles = {
    "phi_c_bulk_round": r"$\phi_c^{{(bulk)}}$",
    "time": "$\hat{{t}}$",
    "lags": "$\hat{t}_{lags}$",
}

- **Cuation: project-specific settings**
- Ignoring some properties in plotting for **TransFoci** project

In [None]:
properties.remove('rfloryTMon')
#properties.remove('fsdTMon')
print(properties)
properties_specs.pop('rfloryTMon')
#properties_specs.pop('fsdTMon')
print(properties_specs)

### Time-series analysis:

#### Normalization or transformation: not needed

In [None]:
def minmax_scaler(
    data,
    data_min,
    data_max
):
    if data_min >= data_max:
        raise ValueError(
            f"In the min-max normalization, the min of data '{data_min}'"
            f" cannot be larger than or equal to '{data_max}'."
        )
    return (data - data_min) / (data_max - data_min)

def standard_scaler(
    data,
    data_mean,
    data_std
):
    return (data - data_mean) / data_std

##### Dataset for testing normalization:

In [None]:
space = spaces[0]
property_ = 'gyrTMon'
hue_attr = 'phi_c_bulk_round'
s_info = parser(
        space, 
        geometry='biaxial',
        group=group,
        lineage='space',
        ispath=False
    )
space_title =  fr" $N={s_info.nmon}, D={s_info.dcyl}, a_c={s_info.dcrowd}$"
#space_title =  f" $n_s={s_info.nmon_small}, n_l={s_info.nmon_large}, a_l={s_info.dmon_large}, D={s_info.dcyl}, a_c={s_info.dcrowd}$"
chainsize_space = chainsize.loc[chainsize['space']==space,['time', property_, hue_attr]]
hue_unique_values = chainsize_space[hue_attr].drop_duplicates().values
chainsize_space[property_ + '-minmax'] = 0
chainsize_space[property_ + '-zscore'] = 0
for hue_unique in hue_unique_values:
    attr_cond = chainsize_space[hue_attr] == hue_unique
    attr_min = chainsize_space.loc[attr_cond, property_].min()
    attr_max = chainsize_space.loc[attr_cond, property_].max()
    chainsize_space.loc[attr_cond, property_ + '-minmax'] = chainsize_space.loc[attr_cond, property_].apply(minmax_scaler, args = (attr_min, attr_max))
    attr_mean = chainsize_space.loc[attr_cond, property_].mean()
    attr_std = chainsize_space.loc[attr_cond, property_].std()
    chainsize_space.loc[attr_cond, property_ + '-zscore'] = chainsize_space.loc[attr_cond, property_].apply(standard_scaler, args = (attr_mean, attr_std))

##### VIsualize normalization

###### z-Score

In [None]:
plotter.p_tseries_allInOne_space(
    property_ + '-zscore',
    hue_attr,
    chainsize_space,
    space,
    space_title,
    r"$\frac{R_g(t)-R_g^{(mean)}}{\sigma_{R_g}}$",
    titles[hue_attr],
    figsize=(18,6),
    leg_ncols=2
)

###### MinMax

In [None]:
plotter.p_tseries_allInOne_space(
    property_ + '-minmax',
    hue_attr,
    chainsize_space,
    space,
    space_title,
    r"$\frac{R_g(t)-R_g^{(min)}}{R_g^{(max)}-R_g^{(min)}}$",
    titles[hue_attr],
    figsize=(18,6),
    leg_ncols=2
)