# The analyze phase:
In this phase, if there are any segment files in the *probe* phase, they are merged into *whole* files. Then,  ensemble, ensemble-averaged, and space files are created from whole files.

### To-do list:

- [x] analyzing *bug* *segement* and *whole* files in both project in a serial manner.
- [ ] analyzing *bug* *segement* and *whole* files in both project in a parallel manner with Dask: memory linkage problem
- [ ] analyzing *all* *segement* and *whole* files in both project in a serial manner.
- [ ] analyzing *all* *segement* and *whole* files in both project in a parallel manner with Dask

### Naming convention:

This is the pattern of file or directory names:

1. **whole** files: whole-group-property_[-measure][-stage][.ext]
2. **ensemble** files: ensemble-group-property_[-measure][-stage][.ext]
3. **ensemble_long** files: ensemble_long-group-property_[-measure][-stage][.ext]
4. **space** files: space-group-property_[-measure][-stage][.ext]
5. **all in one** files: space-group-**species**-**allInOne**-property-_[-measure][-stage][.ext]

[keyword] means that the keyword in the file name is option. [-measure] is a physical measurement such as the auto correlation function (AFC) done on the physical 'property_'.

# PC: parallel scheme with Dask and serial scheme

Files generated in the *probe* phase can be in *segment* or *whole* version. The analyze phase can be started from the *segmens* or *wholes* depending on the type of simulation.

#### Bug

In [1]:
from glob import glob
from polyphys.manage import organizer
from polyphys.manage.parser import SumRuleCyl, TransFociCyl, TransFociCubic
from polyphys.analyze import analyzer
import warnings
warnings.filterwarnings("ignore")
bug_details = {
    'SumRuleCylSegment': {
        'hierarchy': 'N*/N*',  # dir/file
        'parser': SumRuleCyl,
        'geometry': 'cylindrical',
        'is_segment': True,
        'nonscalar_mat_t_properties': [
            # property_, species, group
            ('principalT', 'Mon', 'bug'),
        ],
        'acf_tseries_properties': [
            # property_, species, group
            ('fsdT', 'Mon', 'bug'),
            ('gyrT', 'Mon', 'bug'),
            ('transSizeT', 'Mon', 'bug'),
            ('rfloryT', 'Mon', 'bug'),
            ('shapeT', 'Mon', 'bug'),
            ('asphericityT', 'Mon', 'bug')
        ],
        'hist_properties': [
            # direction, species, group
            ('rflory', 'Mon', 'bug')
        ]  
    },
    
    'SumRuleCylWhole': {
        'hierarchy': 'N*/N*',  # dir/file
        'parser': SumRuleCyl,
        'geometry': 'cylindrical',
        'is_segment': False,
        'nonscalar_mat_t_properties': [
            # property_, species, group
            ('principalT', 'Mon', 'bug'),
        ],
        'acf_tseries_properties': [
            # property_, species, group
            ('fsdT', 'Mon', 'bug'),
            ('gyrT', 'Mon', 'bug'),
            ('transSizeT', 'Mon', 'bug'),
            ('rfloryT', 'Mon', 'bug'),
            ('shapeT', 'Mon', 'bug'),
            ('asphericityT', 'Mon', 'bug')
        ],
        'hist_properties' : [
            # direction, species, group
            ('rflory', 'Mon', 'bug')
        ]  
    },
    'TransFociCylWhole': {
        'hierarchy': 'eps*/eps*',  # dir/file
        'parser': TransFociCyl,
        'geometry': 'cylindrical',
        'is_segment': False,
        'nonscalar_hist_t_properties': [
            # property_, species, group, avg_axis
            ('bondsHistT', 'Foci', 'bug', 0),
            ('clustersHistT', 'Foci', 'bug', 0)
        ],
        'nonscalar_mat_t_properties': [
            # property_, species, group, avg_axis
            ('distMatT', 'Foci', 'bug'),
            ('principalT', 'Mon', 'bug')
        ],
        'acf_tseries_properties': [
            # property_, species, group
            ('fsdT', 'Mon', 'bug'),
            ('gyrT', 'Mon', 'bug'),
            ('transSizeT', 'Mon', 'bug'),
            ('shapeT', 'Mon', 'bug'),
            ('asphericityT', 'Mon', 'bug')
        ]
    },
    'TransFociCubicWhole': {
        'hierarchy': 'al*/al*',  # dir/file
        'parser': TransFociCubic,
        'geometry': 'cubic',
        'is_segment': False,
        'nonscalar_hist_t_properties': [
            # property_, species, group, avg_axis
            ('bondsHistT', 'Foci', 'bug', 0),
            ('clustersHistT', 'Foci', 'bug', 0)
        ],
        'nonscalar_mat_t_properties': [
            # property_, species, group, avg_axis
            ('distMatT', 'Foci', 'bug'),
            ('principalT', 'Mon', 'bug')
        ],
        'acf_tseries_properties': [
            ('fsdT', 'Mon', 'bug'),
            ('gyrT', 'Mon', 'bug'),
            ('shapeT', 'Mon', 'bug'),
            ('asphericityT', 'Mon', 'bug')
        ]
    }
}

In [2]:
%%time
# 10 mins on MacBook Mini with 7000 lags for N2000D30.0ac4.0-probe-bugSegment
# and N2000D30.0ac6.0-probe-bugSegment
#inupt_databases = glob("/Users/amirhsi_mini/research_data/probe/N*-probe-bugSegment")
input_databases = glob("/Users/amirhsi_mini/research_data/probe/ns*-bugWhole/")
project = 'TransFociCubicWhole'
project_details = bug_details[project]
for input_database in input_databases:
    print(input_database)
    #analyzer.analyze_bug(
    analyzer.analyze_bug(
        input_database,
        project_details['hierarchy'],
        project_details['parser'],
        project_details['geometry'],
        project_details['is_segment'],     nonscalar_hist_t_properties=project_details['nonscalar_hist_t_properties'],
nonscalar_mat_t_properties=project_details['nonscalar_mat_t_properties'],
acf_tseries_properties=project_details['acf_tseries_properties']
        #nlags=20000
    )

/Users/amirhsi_mini/research_data/probe/ns400nl5al5ac1phic0_0_15ens1-bugWhole/
'['segment', 'segment_id']' are not among columns.
/Users/amirhsi_mini/research_data/probe/ns400nl5al3ac1phic0_0_15ens1-bugWhole/
'['segment', 'segment_id']' are not among columns.
/Users/amirhsi_mini/research_data/probe/ns400nl5al1ac1phic0_0_15ens1-bugWhole/
'['segment', 'segment_id']' are not among columns.
CPU times: user 16.7 s, sys: 687 ms, total: 17.4 s
Wall time: 17.9 s


#### all

In [3]:
from glob import glob
from polyphys.manage import organizer
from polyphys.manage.parser import SumRuleCyl, TransFociCyl, TransFociCubic
from polyphys.analyze import analyzer
import warnings
warnings.filterwarnings("ignore")
all_details = {
    'SumRuleCylSegment': {
        'hierarchy': 'N*/N*',  # dir/file
        'parser': SumRuleCyl,
        'geometry': 'cylindrical',
        'is_segment': True,
        'rho_phi_hist_properties': [
            # direction, species, group
            ('r', 'Crd', 'all'),
            ('r', 'Mon', 'all'),
            ('z', 'Crd', 'all'),
            ('z', 'Mon', 'all'),
        ],
        'hist_properties': [
            # direction, species, group
            ('theta', 'Crd', 'all'),
            ('theta', 'Mon', 'all'),
        ],
        'hist2d_properties': [
            # direction, species, group
            ('xy', 'Crd', 'all'),
            ('xy', 'Mon', 'all'),
            ('xz', 'Crd', 'all'),
            ('xz', 'Mon', 'all'),
            ('yz', 'Crd', 'all'),
            ('yz', 'Mon', 'all'),
        ],
        'hist2d_edges': [
            # direction, group
            ('x', 'all'),
            ('y', 'all'),
            ('z', 'all'),
        ]
    },
    'TransFociCylWhole': {
        'hierarchy': 'eps*/eps*',  # dir/file
        'parser': TransFociCyl,
        'geometry': 'cylindrical',
        'is_segment': True,
        'rho_phi_hist_properties': [
            # direction, species, group
            ('r', 'Crd', 'all'),
            ('r', 'Mon', 'all'),
            ('r', 'Foci', 'all'),
            ('z', 'Crd', 'all'),
            ('z', 'Mon', 'all'),
            ('z', 'Foci', 'all')
        ],
        'hist_properties': [
            # direction, species, group
            ('r', 'Dna', 'all'),
            ('z', 'Dna', 'all'),
            ('theta', 'Crd', 'all'),
            ('theta', 'Mon', 'all'),
            ('theta', 'Dna', 'all'),
            ('theta', 'Foci', 'all')
        ],
        'hist2d_properties': [
            # direction, species, group
            ('xy', 'Crd', 'all'),
            ('xy', 'Mon', 'all'),
            ('xy', 'Dna', 'all'),
            ('xy', 'Foci', 'all'),
            ('xz', 'Crd', 'all'),
            ('xz', 'Mon', 'all'),
            ('xz', 'Dna', 'all'),
            ('xz', 'Foci', 'all'),
            ('yz', 'Crd', 'all'),
            ('yz', 'Mon', 'all'),
            ('yz', 'Dna', 'all'),
            ('yz', 'Foci', 'all'),
        ],
        'hist2d_edges': [
            # direction, group
            ('x', 'all'),
            ('y', 'all'),
            ('z', 'all'),
        ]
    },
    'TransFociCubicWhole': {
        'hierarchy': 'al*/al*',  # dir/file
        'parser': TransFociCubic,
        'geometry': 'cubic',
        'is_segment': True,
        'rho_phi_hist_properties': [
            # direction, species, group
            ('r', 'Crd', 'all'),
            ('r', 'Mon', 'all'),
            ('r', 'Foci', 'all'),
        ],
        'hist_properties': [
            # direction, species, group
            ('r', 'Dna', 'all'),
        ],
        'hist2d_properties': [
            # direction, species, group
            ('xy', 'Crd', 'all'),
            ('xy', 'Mon', 'all'),
            ('xy', 'Dna', 'all'),
            ('xy', 'Foci', 'all'),
            ('xz', 'Crd', 'all'),
            ('xz', 'Mon', 'all'),
            ('xz', 'Dna', 'all'),
            ('xz', 'Foci', 'all'),
            ('yz', 'Crd', 'all'),
            ('yz', 'Mon', 'all'),
            ('yz', 'Dna', 'all'),
            ('yz', 'Foci', 'all'),
        ],
        'hist2d_edges': [
            # direction, group
            ('x', 'all'),
            ('y', 'all'),
            ('z', 'all')
        ]
    }
}

In [4]:
%%time
# 15 mins on MacBook Mini with 7000 lags for N1000D15.0ac1.0-probe-bugWhole
# N1000D15.0ac2.0-probe-bugWhole, N2000D20.0ac1.0-probe-bugWhole and 
# N2000D20.0ac2.0-probe-bugWhole
# 12 mins on MacBook Mini with 7000 lags for N2000D25.0ac1.0-probe-bugWhole
# N2000D25.0ac2.0-probe-bugWhole, N2000D25.0ac4.0-probe-bugWhole
# 8 mins on MacBook Mini with 7000 lags for N2000D30.0ac4.0-probe-bugSegment
# N2000D30.0ac6.0-probe-bugSegment
# 4 mins on MacBook Mini with 7000 lags for N500D10.0ac0.8-probe-bugSegment
# N500D10.0ac1.0-probe-bugSegment
# 3min mins on MacBook Mini with 7000 lags for N500D10.0ac0.6-probe-bugSegment
#inupt_databases = glob("/Users/amirhsi_mini/research_data/probe/N1000D15.0ac2.0-bugWhole")
input_databases = glob("/Users/amirhsi_mini/research_data/probe/ns*-bugWhole/")
project = 'TransFociCubicWhole'
project_details = all_details[project]
for input_database in input_databases:
    print(input_database)
    analyzer.analyze_all(
        input_database,
        project_details['hierarchy'],
        project_details['parser'],
        project_details['geometry'],
        project_details['is_segment'],
        hist_properties=project_details['hist_properties'],
        hist2d_properties=project_details['hist2d_properties'],
        hist2d_edges=project_details['hist2d_edges'],
        #rho_phi_hist_properties=project_details['rho_phi_hist_properties']
    )

/Users/amirhsi_mini/research_data/probe/ns400nl5al5ac1phic0_0_15ens1-bugWhole/
/Users/amirhsi_mini/research_data/probe/ns400nl5al3ac1phic0_0_15ens1-bugWhole/
/Users/amirhsi_mini/research_data/probe/ns400nl5al1ac1phic0_0_15ens1-bugWhole/
CPU times: user 1.48 s, sys: 736 ms, total: 2.22 s
Wall time: 3.76 s
