In [1]:
from glob import glob
from pathlib import Path
from PipeLine import *
import datetime
import numpy as np
import pandas as pd
import MDAnalysis as mda 
from MDAnalysis.coordinates.LAMMPS import DumpReader
from MDAnalysis import transformations as mdatransform
from MDAnalysis.analysis import diffusionmap, align, rms
import matplotlib.pyplot as plt
import seaborn as sns
import polyphys

  MIN_CHEMFILES_VERSION = LooseVersion("0.9")


# Test volume fraction distributuion

In [None]:
class Distributions():
    """
    Distributions computes the local number density of any type of particle and the local volume fraction of bead (spheres) in 1D (cylinder with the eperiodic boundary condition (PBC) along the longitudinal axis (z axis)), 2D (slit with the PBC along the x and y axes (radial direction)), and 3D (cube with the PBC along x, y, and z axes (spheric)).
        Here it is assumed that particles are hard so their volume of intersections with bins are numerically calculated in full details. However, particles can also be soft, so their volume can follw a particle distribution such as the normal distribution. Indeed, the soft particle approcimation is a more reasonbale choice for Lennard-Jones particles. If the sof particle distribution is desirable, this class should be extended to have a functionality to take an arbitrary function as the volume distribution function for particles. The soft-particle approximation is indeed similar to methods in other branches of STEM to smooth a histogram.
    There are three types of bins: concenteric bins (and thus concentric edges) along the radial direction, consecutive bins along the longitudinal direction, and cyclic consecutive bins along the polar and azimuthal directions. This method correctly finds the bounds in the first two cases.
    The histogram (frequecny of occurance) and number denisty are functions of one bin/bin center/a pair of consecutive edges:
    If H_i the freiqency of particle in in bin or at bin center i, then rho_i=H_i/V_i is the number density and V_i is the volume of the bin i. 
    However, the volume fraction is a function of several bins. More precisely, the number of bins is set by the ratio of the bead size (diameter) R divided by the bin size h: M_j<=R/h is the number of bins with which the bead intersect/overlaps where j is the index of a bin or bin center. the smaller relation (<) holds when the bead which is close to the boundaries (the left and right boundary along longitudinal direction or the outermost boundary along the radial direction). In other cases, the equality (=) holds. In other words, the volume of a bead is shared amoing M_j bins, so it is need to find the volume of intersection of the bead and a bin. For the sake of similipcity, it is assumed all the particles that are in bin i with frequency H_i are located at the bin center, so we have a bin/test bead that is located at the center of bin. There are N bins or bin centers, test or bin bead, frequnecies H_i,  number densities rho, and volume fraction phi while there is  N+1 bin edges.
    The volume fraction phi_i of bin i is the sum over all the shares that bin i have from the volumes of N bin/test beads; in other words, phi_i=The sum over rho_j*v_j where P_i is the number of shares (the number of terms in the sum) that bin i have from the volumes of N bin/test beas, v_j is the volume share of the bin_i from bin/test bead j and rho_j is the number density of the bin j in which the bin/test bead j resides.
    Due to the above-described assumption and the one-dimensional nature of the problem (frequencies, densities, and volume fractions are measured along one direction), it is needed to find the volume share v_j once for each bin/test bead.
    The distribution functions (histogras, number densities, and volume fractions) are averaged twice: First, over all the measurements in one simulation (time-averaging), this steps result in the distributions of an individual simulation. Then, the time averages distribution of interest is avaraged over all the thermoynamiclly-equivalent simulation in an ensemble and an ensemble-averaged distribution is generated.
    len(histogram.index)=len(bin_size)=len(bin_vols)=len(rho)=len(bin_edges)-1
    
    Issue: Resolved on 20211016
    The problem with some choice of bin size such a 0.2a for monomer size a=1.0 in the local volume fraction of monomers, or 0.4a for crowder size a_c=6.0a with monomer size a=1.0 in the local volume fraction of crowders has not been resolved yet.
    
    Caution:
    A simulation group usually results in a graph or curve for the project and refers to a collection of simulations that all have the same values for one or several input parameters of the project.
    An ensemble is a collection of themodynamically-equivalent simulations that differs only in their random number seeds, initial conditions, or boundary conditions but have the same input parameters. In standard statitatical mechanical approach, an ensmeble is equivalent a simulation, but here we used it to reffer to all the thermodynamically-equivalent simulations.
    An ensemble-averaged group is an average over all the simulations in an ensemble and usually gives a data point.
    If there are N esmebles, each with M simulations, then there are N ensemble-average groups and N*M simulations in the simulation group.
    
    Parameters:
    histogram (pandas dataframe): a dataframe of an ensemble or ensemble-averaged or group simulation in which the index is the bin centers, the names of the columns are the name of simulations in an ensemble or the name of ensemble-averaged group or the name of simulation group, the columns are the frequenies of partilces of the type of interest (see radius_type) in each of the bins, and the number of columns is the number of simulations in a ensemble, one in an ensemble-averaged group, or the number of ensmebles (=the number of ensemble-averaged) groups an a simulations group. 
    properties (pandas dataframe): the properties of each simulation/ensemble-averaged simulation/ensemble-averaged simulations of the simualtions/ensemble-averaged simulation/ensemble-averaged simulations in a ensemble/ensemble-averged group/simulation group.
    raduis_type (str): the name of the column in the properties in which the size (or diameter) of the particles are tabled. The particle type is the same for all the particles that their frequencies are counted in histogram. 
    geometry (str): the shape of the simulation box
    direction (str): the direction of interest in the geometry of interest.
    """
    _geometries = ["cubic", "slit", "cylindrical"] # different types of simulation boxs: cubic (free space or pbc: periodic boundary condition in all three directions), slit (pbc in x and y directions), cylinder (pbc in z direction)
    _directions = {
        "cubic": ["radial", "polar", "azimuthal"],
        "slit": ["radial", "polar", "longitudinal"],
        "cylindrical": ["radial", "polar", "longitudinal"]
    }
    _integrands = {
        "cubic":{"radial" : lambda r, const: 4 * const * np.pi * r**2, # cocentric spherical shells; constant is redundant and merely defined to make the use of args parameter of scipi.integral.quad function consistent among integrands.
                 "polar": lambda phi, dcyl: dcyl**3 / 12, # spherical sections of diameter dcyl
                 "azimuthal": lambda theta, dcyl: np.pi * dcyl**3 * np.sin(theta) / 12 # spherical wedges of diameter dcyl
                },
        "slit":{
            "radial" : lambda r, lcyl: 2 * np.pi * lcyl * r, # cocentric cyliderical shells with length lcyl
            "polar" : lambda theta, lcyl, dcyl: 0.25 * lcyl * dcyl**2, # cylindrical sectors of length lcyl and diameter dcyl
            "longitudinal": lambda z, dcyl: 0.25 * np.pi * dcyl**2 # disks of diamter dcyl
        },
        "cylindrical":{
            "radial": lambda r, lcyl: 2 * np.pi * lcyl * r, # cocentric cyliderical shells with length lcyl
            "polar": lambda theta, lcyl, dcyl: 0.25 * lcyl * dcyl**2, # cylindrical sectors of length lcyl and diameter dcyl
            "longitudinal": lambda z, dcyl: 0.25 * np.pi * dcyl**2 # disks of diamter dcyl
        }
    }
    _short_names = {
        "cubic":{"radial" : "r",
                 "polar": "theta",
                 "azimuthal": "phi"
                },
        "slit":{
            "radial" : "r",
            "polar" : "theta",
            "longitudinal": "z"
        },
        "cylindrical":{
            "radial": "r",
            "polar": "theta",
            "longitudinal": "z"
        }
    }
    
    def __init__(self, histogram, properties, r_particle, geometry, direction, round_to=4, normalize=True):
        if isinstance(histogram, pd.DataFrame):
            self.histogram = histogram
            self.centers = np.around(histogram.index.to_numpy(),decimals=2) # since centers are index it is important to have them round. decimals=2 work well for a_c/a<1 and a_c>a>=1 where a is monomer diamter and a_c is  crowder diamter.
        else:
            raise TypeError(f"'{histogram}' is not a Pandas Dataframe. Currently, Pandas Dataframes are only supported.")    
        if isinstance(properties, pd.DataFrame):
            self.properties = properties
        else:
            raise TypeError(f"'{properties}' is not a Pandas Dataframe. Currently, Pandas Dataframes are only supported.")
        if geometry in self._geometries:
            self.geometry = geometry
        else:
            geomteries_string = "'" + "', '".join(self._geometries) + "'"
            raise ValueError(f"'{geometry}'"
                             " is not a valid geometry for the simulation box. Please select one of "
                             f"{geomteries_string} geometries.")
        if direction in self._directions[self.geometry]:
            self.direction = direction
        else:
            directions_string = "'" + "', '".join(self._directions[self.geometry]) + "'"
            raise ValueError(f"'{direction}'"
                             " is not a valid direction for "
                             f"'{self.geometry}' geometry. Please select one of "
                             f"{directions_string} directions.")
        #self.radius_type = radius_type
        #self.r_particle = 0.5 * self.properties[self.properties.filename==filename][self.radius_type].values[0] # Assumed the sizes of the particle of interest are the same for all the columns.
        #self.r_particle = 0.5 * self.properties[self.radius_type].values[0] # Assumed the sizes of the particle of interest are the same for all the columns.
        self.r_particle = r_particle
        self.normalize = normalize
        self.short_name_rho = self._short_names[self.geometry][self.direction]+'Rhos'
        self.short_name_phi = self._short_names[self.geometry][self.direction]+'Phis'
        self._edges_from_centers()
        self._initiate_distribution()
        self._vol_shares_type()
        self._run()
    
    def _edges_from_centers(self):
        """
        _edges_from_centers creates bin edges from bin centers, assuming:
        1. all the bins have the same size, so the distance between two consecutive edges and two consecutive centers are equal.
        2. The centers linearly increase over range is [A,B]. ] means including B.
        
        Caution:
        len(edges) = len(centers) + 1
        """
        
        self.bin_size = np.around(self.centers[1] - self.centers[0],decimals=round_to) # bin_size: it should be rounded otherwise, the bin centers (the histograms indexes) and bin edges does not have the same number of meaningful digits.
        self.edges = self.centers - 0.5 * self.bin_size # all the edges except the last
        self.edges = np.append(self.edges,self.centers[-1]+ 0.5*self.bin_size) # the last edge      
    
    def _initiate_distribution(self):
        """
        _initiate_distribution creates empty dataframe with the same index and columns as the given one.
    
        """
        self.rho = pd.DataFrame(index=self.histogram.index, columns=self.histogram.columns) # number density
        self.phi = pd.DataFrame(index=self.histogram.index, columns=self.histogram.columns) # local volume fraction
    
    def spherical_segment(self, r, a, b):
        """
        spherical_segment computes the volume of a spherical segment. This function is written 
        based on equation 3 in the following webpage:
        https://mathworld.wolfram.com/SphericalSegment.html

        Caution: 
        1. a and b can be negative or postive, depending on the position with respect to the center of sphere in the range [-r,r].
        2. When a=r or b=r, we have a spherical cap.

        Parameters:
        r (float): Radius of the sphere
        a (float): Distance of the first base from the center of the sphere
        b (float): Distance of the second base from the center of the sphere

        Returns:
        V (float): Volume of the spherical segment

        Requierment: 
        Numpy.
        """
        if r <= 0:
            raise ValueError(" r should be within a non-zero postive number.")
        
        lower = min(a,b) # The lower bound of the volume integral
        upper = max(a,b) # The upper bound of the volume integral
        # always upper>lower since we assume the axis over which left, center, right are located points to right.       
        if np.abs(upper) >= r :
            #print("The absolut value of the upper bound is larger r, so it is set to r.")
            upper = upper * r / np.abs(upper)
        elif np.abs(lower) >= r :
            #print("The absolute value of the lower bound is larger r, so it is set to r.")
            lower = lower * r / np.abs(lower)
        V = np.pi * (r**2*(upper-lower) - (upper**3 - lower**3)/3)
        if (np.abs(lower) >= r and np.abs(upper) >= r) and lower * upper > 0:
        #if lower * upper > r**2:
            #print(" Both a and b are either smaller than -r or larger than r.")
            V = 0
        return V

    def _consecutive_bounds(self):
        """
        _find_bounds_consecutive finds the lowest and highest bin edges by which a spherical bead residing at the center of a bin intersects. There are three types of bins: concenteric bins (and thus concentric edges) along the radial direction, consecutive bins along the longitudinal direction, and cyclic consecutive bins along the polar and azimuthal directions. This method correctly finds the bounds in the first two cases. This method find the bounds in a consecutive-bins shceme along the longitudinal direction.
        
        Cuation: 
        This is a 1D problems since the bins, bin centers and bin edges are along the same axis. In reality, the center of a spherical bead can be anywhere in a bin, but it is placed at the center of the bin along the axis of interest.
        The center of a particle can be in any of the bins, so 
        While this assumption makes the value of the local volume fraction slightly inaccurate, it singnificantly reduce the computational cost of measure the local volume fraction in a system with a large number of particles.
        The positions and r_concentrics have some relations:
        1. len(edges) = len(centers) + 1
        2. There is only one center between two consecutive edges.
        3. All the arrays are increasing funcions of their indices.
        
        leftmost/leftmost_idx is the value/index of the bin center in which the leftmost boundary of a particle is located.
        rightmost/rightmost_idx is the value/index of the bin center in which the rightmost boundary of a particle is located.
        
        Instead of the leftmost/rightmost pairs (which are more appropriate for the longitudinal direction), the innermost/outermost can also be used.
        """
        box_length = self.edges[-1] - self.edges[0] # length of simulation domain along the direction of interest.
        leftmost = self.centers - self.r_particle # The minimum distance of the r_atoms perimeter from the origin
        leftmost = np.where(leftmost < self.edges[0],leftmost + box_length,leftmost)
        leftmost_idx = np.zeros(len(leftmost),dtype=int) # Initiate the leftmost bound with the lowest possible bound
        rightmost = self.centers + self.r_particle # The maximum distance of the r_atoms perimeter from the origin
        rightmost = np.where(rightmost > self.edges[-1],rightmost - box_length,rightmost)
        rightmost_idx = (len(rightmost)-1) * np.ones(len(rightmost),dtype=int) # Initiate the rigtmost bound with the highest possible bound
        for idx, leftmost_value in enumerate(leftmost):
            for edge_idx in range(len(self.edges[:-1])):
                if (leftmost_value >= self.edges[edge_idx]) and (leftmost_value < self.edges[edge_idx+1]): # the index of the leftmost bin (or the index of the **left edges** of leftmost bin) is set as the index of the bin in which the leftmost side of the bead is located.
                    leftmost_idx[idx] = edge_idx
                if (rightmost[idx] > self.edges[edge_idx]) and (rightmost[idx] <= self.edges[edge_idx+1]): # the index of the rightmost bin (or the index of the **left edge** of rigthmost bin) is set as the index of the bin in which the rightmost side of the bead is located. Keep the difference in <= ith the leftmost in mind.
                    rightmost_idx[idx] = edge_idx
        self.particle_bounds = np.column_stack((leftmost_idx,rightmost_idx)) 

    def _consecutive_vol_shares(self): # make this method better -- see the commented effort:
        """
        _concentric_vol_shares computes the portion of the volume of a bead (a sphere) in consecutive disk-like bins along the longitudinal direction in a cylindrical geometry. The center of the test particle with radius self.r_particle is placed at the center of each bin. Depending on its radius, the particle can intersect with more than one bins. so its volume can contributes to the local volume fraction in more than one bin. In the self.volume_shares nested dictionary below,  self.centers are keys, each showing the bin at which the center of particle resides and dictionaries of the volume shares are the values. In inner dictionary, the bin center of a bin by which the particle intersects is the key and the volume of intersection of the particle with the bin is the value.
        
        In this algorithm, the total volume of a particle is conserved.

        Caution:
        The centers and edges have these relations:
        1. len(edges) = len(centers) + 1
        2. There is only one center between two consecutive edges.
        3. All the arrays are increasing funcions of their indices.
        4. To solve the problem with PBC in z direction, there is if-elif-else statement below in which the if part ensures the volume shares in the bins, that are close to the left side of the simulation box, are counted correctly and sum up to the volume of the particle. The elif part does the same for the bins close to the right side of the simulation box. the else part handles the rest of bins.

        Parameters:
        intersection_calculator: the function computes the volume of intersection between the bead with r_particle as its radius centerred at centers  and the consecute disk with diameter dcyl centered at edges. It is the self.spherical_segment in this method.

        Return: 
        volume_shares (a thwo-fold nested dict): A three-fold nested dict in which the (outmost) keys of the outmost dict are the centers and the (outmost)values of that are the dicts with an unequal number of keys. The keys of the inner dict are the edge indexes between the atom_bounds (inclusivily, i.e. [A,B] includes A and B as well) and the values of that are the volume of intersection between the bead and the bin for which the edge is the left or inner index.
        volume_shares={center1:{edge_index1: intersection_volume1, edge_index2: intersection_volume2 ...} , ...}  
        
        Effort to make this method better:
               for center_idx, bounds_minmax in enumerate(self.particle_bounds):
            self.volume_shares[center_idx] = {}
            # share of the middle bins from the volume of the sphere:
            # edge value is an increasing function of edge index
            if (bounds_minmax[0] > bounds_minmax[1]) and (center_idx <= len(self.centers)//2):
                lower_bound = bounds_minmax[0]
                upper_bound = len(self.edges)-1
                lower_bound_2 = 0
                upper_bound_2 = bounds_minmax[1]+1
                center_1 = self.centers[center_idx] + box_length # Due to the PBC, the bin center is moved to right side, so the volume of intgersection can be calculated correctly
                center_2 = self.centers[center_idx]
            elif (bounds_minmax[0] > bounds_minmax[1]) and (center_idx > len(self.centers)//2):
                lower_bound = bounds_minmax[0]
                upper_bound = len(self.edges)-1
                lower_bound_2 = 0
                upper_bound_2 = bounds_minmax[1]+1
                center_1 = self.centers[center_idx]           
                center_2 = self.centers[center_idx] - box_length # Due to the PBC, the bin center is moved to right side, so the volume of intgersection can be calculated correctly.
            else:
                lower_bound = bounds_minmax[0]
                upper_bound = bounds_minmax[1]+1
                center= self.centers[center_idx] 
            for edge_idx in range(bounds_minmax[0],bounds_minmax[1]+1,1):
                    left_distance = self.edges[edge_idx]-self.centers[center_idx]
                    right_distance =  self.edges[edge_idx+1]-self.centers[center_idx] # the most right bound can be a spherical cap or shperical segment; the spherical segments are used for the bins near the bounds of simulation box.
                    #if np.abs(right_distance) >= self.r_particle: 
                    self.volume_shares[center_idx][edge_idx] = self.spherical_segment(self.r_particle, left_distance, right_distance)
        """
        box_length = self.edges[-1] - self.edges[0]
        self.volume_shares = {}
        for center_idx, bounds_minmax in enumerate(self.particle_bounds):
            self.volume_shares[center_idx] = {}
            # share of the middle bins from the volume of the sphere:
            # edge value is an increasing function of edge index
            
            if (bounds_minmax[0] > bounds_minmax[1]) and (center_idx <= len(self.centers)//2): 
                for edge_idx in range(bounds_minmax[0],len(self.edges)-1,1):
                    center = self.centers[center_idx] + box_length # Due to the PBC, the bin center is moved to right side, so the volume of intgersection can be calculated correctly.
                    left_distance = self.edges[edge_idx] - center
                    right_distance =  self.edges[edge_idx+1] - center # the most right bound can be a spherical cap or shperical segment; the spherical segments are used for the bins near the bounds of simulation box.
                    self.volume_shares[center_idx][edge_idx] = self.spherical_segment(self.r_particle, left_distance, right_distance)
                
                for edge_idx in range(0, bounds_minmax[1]+1,1):
                    left_distance = self.edges[edge_idx] - self.centers[center_idx]
                    right_distance =  self.edges[edge_idx+1] - self.centers[center_idx] # the most right bound can be a spherical cap or shperical segment; the spherical segments are used for the bins near the bounds of simulation box.
                    self.volume_shares[center_idx][edge_idx] = self.spherical_segment(self.r_particle, left_distance, right_distance)

            elif (bounds_minmax[0] > bounds_minmax[1]) and (center_idx > len(self.centers)//2):
                for edge_idx in range(bounds_minmax[0],len(self.edges)-1,1):
                    left_distance = self.edges[edge_idx]-self.centers[center_idx]
                    #if np.abs(left_distance) >= self.r_particle: # the most left bound can be a spherical cap or a spherical segment; the spherical segments are used for the bins near the bounds of simulation box.
                    right_distance =  self.edges[edge_idx+1]-self.centers[center_idx] # the most right bound can be a spherical cap or shperical segment; the spherical segments are used for the bins near the bounds of simulation box.
                    self.volume_shares[center_idx][edge_idx] = self.spherical_segment(self.r_particle, left_distance, right_distance)
                
                for edge_idx in range(0, bounds_minmax[1]+1,1):
                    center = self.centers[center_idx] - box_length # Due to the PBC, the bin center is moved to left side, so the volume of intgersection can be calculated correctly.
                    left_distance = self.edges[edge_idx] - center
                    right_distance =  self.edges[edge_idx+1] - center # the most right bound can be a spherical cap or shperical segment; the spherical segments are used for the bins near the bounds of simulation box.
                    #if np.abs(right_distance) >= self.r_particle: 

                    self.volume_shares[center_idx][edge_idx] = self.spherical_segment(self.r_particle, left_distance, right_distance)
            
            else:
                for edge_idx in range(bounds_minmax[0],bounds_minmax[1]+1,1):
                    left_distance = self.edges[edge_idx]-self.centers[center_idx]
                    right_distance =  self.edges[edge_idx+1]-self.centers[center_idx] # the most right bound can be a spherical cap or shperical segment; the spherical segments are used for the bins near the bounds of simulation box.
                    #if np.abs(right_distance) >= self.r_particle: 
                    self.volume_shares[center_idx][edge_idx] = self.spherical_segment(self.r_particle, left_distance, right_distance)

    def sphere_sphere_intersection(self, r, R, d):
        """
        sphere_sphere_intersction computes the volume of intersection of two spheres. The sphere with redius R
        is at the origin (0,0,0) while the other one with radius r is located along axis x at x=d (d,0,0). This function can be used to find the local volume fraction of a spherical beads in the radial direction of in a space with spherical
        symmetry.

        Reference: https://mathworld.wolfram.com/Sphere-SphereIntersection.html

        Inputs:
        r: the radius of the sphere locared along axis x.
        R: the radius of the sphere located at the origin.
        d: the distance of the the off-origin sphere from the origin along axis x.

        Returns:
        V: volume of intersection.

        Requirements:
        numpy package for constant Pi.
        """
        # By define Rmax and Rmin, we handlthe situtations in which d = 0:
        Rmax = max(R,r)
        Rmin = min(R,r)
        if r ==0 or R == 0:
            V = 0 
        else : 
            if d == 0: # the small sphere resides completely in the large one.
                V = 4*np.pi*Rmin**3 / 3 

            elif d >= Rmin+Rmax: # the spheres are either tengential to eachother or not interesting.
                V = 0
            else:
                if d <= Rmax-Rmin: # the small sphere resides completely in the large one.
                    V = 4*np.pi*Rmin**3 / 3

                else :
                    V = np.pi * (Rmax + Rmin - d)**2 * (d**2 + 2*d*Rmin - 3*Rmin**2 + 2*d*Rmax + 6*Rmin*Rmax - 3*Rmax**2) / (12*d)
        return V
    
    def _concentric_bounds(self):
        """
        _find_concentric_bounds finds the lowest and highest bin edges by which a spherical bead residing at the center of a bin intersects. There are three types of bins: concenteric bins (and thus concentric edges) along the radial direction, consecutive bins along the longitudinal direction, and cyclic consecutive bins along the polar and azimuthal directions. This method correctly finds the bounds in the first two cases. This method find the bounds in a concentric-bins shceme along the  ridial direction.
        
        Cuation: 
        This is a 1D problems since the bins, bin centers and bin edges are along the same axis. In reality, the center of a spherical bead can be anywhere in a bin, but it is placed at the center of the bin along the axis of interest.
        The center of a particle can be in any of the bins, so 
        While this assumption makes the value of the local volume fraction slightly inaccurate, it singnificantly reduce the computational cost of measure the local volume fraction in a system with a large number of particles.
        The positions and r_concentrics have some relations:
        1. len(edges) = len(centers) + 1
        2. There is only one center between two consecutive edges.
        3. All the arrays are increasing funcions of their indices.
        
        Instead of the leftmost/rightmost pairs (which are more appropriate for the longitudinal direction), the innermost/outermost is used.
        """
        innermost = self.centers - self.r_particle # The minimum distance of the r_atoms perimeter from the origin
        innermost_idx = np.zeros(len(innermost),dtype=int) # Initiate the leftmost bound with the lowest possible bound
        outermost = self.centers + self.r_particle # The maximum distance of the r_atoms perimeter from the origin
        outermost_idx = (len(outermost)-1) * np.ones(len(outermost),dtype=int) # Initiate the rigtmost bound with the highest possible bound
        for idx, innermost_value in enumerate(innermost):
            for edge_idx in range(len(self.edges[:-1])):
                if (innermost_value >= self.edges[edge_idx]) and (innermost_value < self.edges[edge_idx+1]): # the inner edge index of the bin is set as the index of the bin by which the innermost side of the bead intersects.
                    innermost_idx[idx] = edge_idx + 1 # For the innermost bond, the intersection of the the bead and the outer edge of the innermost bin is important!
                if (outermost[idx] >= self.edges[edge_idx]) and (outermost[idx] < self.edges[edge_idx+1]): # the outer edge index of the bin is set as the index of the bin by which the outermost side of the bead intersects.
                    outermost_idx[idx] = edge_idx # For the outermost bond, the intersection of the the bead and the inner edge of the outermost bin is important!
        self.particle_bounds = np.column_stack((innermost_idx,outermost_idx)) 

    def _concentric_vol_shares(self):
        """
        _concentric_vol_shares computes the portion of the volume of a bead (or a sphere) in different spherical or cylindrical concenteric bins. The intersection_calculator computes the volume of intersection between the bead and the bin (sphere or cylinder) of radius r_concentric located at origin.
        For the sake of simpilicity, the sphere-sphere interesction is also used for the sphere-cylinder intersection in the radial direction in the cylindrical and slit goemteries. Hence, the concentric_vol_shares can be used in all the three different radial directions.

        Caution:
        The volume share of the innermost bin is equal to the volume of the intersection given by the intersection_calculator.
        The volume share of the other bins is equal to the volume of the intersection of the bin with the inner edge of radius r_concentric substracted by the volume of the intersection of the previous bin with the inner edge given by its radius r_concentric.

        This function can be used in calculation of the local volume fraction of beads in two different situation:
        1. The radial direction in the cubic geometry with the sphere_sphere_intersection function as the intersection_calculator.
        2. The radial dirction in the cylindrical geometry with the sphere_cylinder_intersection function as the intersection_calculator.
        
        The centers and edges have these relations:
        1. len(edges) = len(centers) + 1
        2. There is only one center between two consecutive edges.
        3. All the arrays are increasing funcions of their indices.

        Parameters:
        intersection_calculator: the function computes the volume of intersection between the bead with r_particle as its radius centerred at centers  and the concentric spherical or cylindrical shells with edges as their radii all centered at the origin. It is self.sphere_sphere_intersection for this method.

        """
        self.volume_shares = {}
        for center_idx, bound_minxax in enumerate(self.particle_bounds):
            self.volume_shares[center_idx] = {}
            intersect_vol_previous = 0 # The volume share of the lowest bin all comes from itself.
            #for edge_idx in range(bound_minxax[0]+1,bound_minxax[1]+1,1):
            for edge_idx in range(bound_minxax[0],bound_minxax[1]+2,1): # The index of upper limit is increased by 2 units since 1 units becuase of the range function and the other one is because of the share of the last outmost bin. The share of outmost bin is the volume of sphere minus the volume of the interestion of the bead with the laregest bin edge samller than the outmost edge of the bead.
                intersect_vol = self.sphere_sphere_intersection(self.r_particle, self.edges[edge_idx], self.centers[center_idx])
                self.volume_shares[center_idx][edge_idx-1]= intersect_vol - intersect_vol_previous # The intersection volume between bead and edge i belongs to bin i-1. The volume share of the previous bin i-1 should be subsracted from bin i; draw a figure to realize it!.
                intersect_vol_previous = intersect_vol # set this intersection_vol as the old one.

    def _vol_shares_type(self):
        """
        _vol_shares_type chooses how the volume_shares should be measured based on the given direction. Currently, the vol_shares method is implemented in the radial direction in all the geometries (see the notes for _concentric_vol_shares) and the longitudinal direction in the cylindrical geometry.
        """
        if self.direction == "radial":
            self._concentric_bounds()
            self._concentric_vol_shares()
        elif self.direction == "longitudinal":
            self._consecutive_bounds()
            self._consecutive_vol_shares()
        else:
            raise ValueError(f"'volume_shares' is not defined in the {self.direction} direction.")

    def _set_args(self, col_name):
        """
        _set_args set the arguments for the integrads along different directions in different geometries.
        
        Parameters:
        col_name: the name of column for which the arguments are set.
        """
        chosen_properties = self.properties[self.properties.filename==col_name]
        self._args = {
            "cubic": {
                "radial" : (1,), # cocentric spherical shells; constant is redundant and merely defined to make the use of args parameter of scipi.integral.quad function consistent among integrands.
                "polar": (0.5 * chosen_properties['lcyl'].values[0], ), # in a cubic or free space, the radius of the space is half of the length of simulation box
                 "azimuthal": (0.5 * chosen_properties['lcyl'].values[0], ),
            },
            "slit": {
                "radial" : (chosen_properties['lcyl'].values[0], ),
                "polar" : (chosen_properties['lcyl'].values[0], chosen_properties['dcyl'].values[0], ),
                "longitudinal": (chosen_properties['dcyl'].values[0], )
            },
            "cylindrical": {
                "radial": (chosen_properties['lcyl'].values[0], ),
                "polar": (chosen_properties['lcyl'].values[0], chosen_properties['dcyl'].values[0], ),
                "longitudinal": (chosen_properties['dcyl'].values[0], )
            }
        }
    
    def _number_density(self, col_name):
        """
        _number_density calculate the local number density along the given direction in the given geometry. The local number density is normalized to give the area under the curve equal to one. The number density in each simulation is an average over the number densities collected every X time steps, so there are N=L/X measurements of the local number desnity in each simulation where L is total number of time steps in the simulation. For the cylindrical sum rule project X=5000 and the total number of time steps is 7*10^7, so N=14001. For the ensemble-averages, each local number desnity is averages over the M simulations in an ensemble. FOr the cylindrical sum rule project, M=8.
        
        Parameters:
        col_name: the name of column for which the number density is calculated.
        """
        integrand = self._integrands[self.geometry][self.direction]
        arguments = self._args[self.geometry][self.direction]
        bin_vols = np.array([integrate.quad(integrand, self.edges[idx] ,self.edges[idx]+self.bin_size, args=arguments)[0] for idx in range(len(self.edges[:-1]))])
        #self.rho[col_name] = np.divide(self.histogram[col_name], bin_vols)
        self.rho[col_name] = self.histogram[col_name].divide(bin_vols) # elf.histogram[col_name] and bin_vols have the same size.
        # the sum of rho is not equal to the bulk number density (r=infiity) natom/cell_vo. This arises from the way we descritize the local number desnity.
        if self.normalize:
            self.rho[col_name] = self.rho[col_name] / self.rho[col_name].sum() # time averaging: the sum of histograms = natoms * nframes. normalization: the sum of the number density is now 1.

    def _volume_fraction(self, col_name):
        """
        _volume_fraction computes the local volume fraction along the direction of interest in the given goemtetry. All the particles have the same shape. The local volume fraction is normalized to give the integral of p(r or theta or z)=1 along the direction of interest in the region of interest. See the explnation for the _number_density method and Distributions class.
        
        Parameters:
        col_name: the name of column for which the volume fraction is calculated.
        """
        rho = self.rho[col_name].to_numpy()
        n_centers = len(rho)
        phi = np.zeros(n_centers)
        for center_idx in range(n_centers):
            for vol_share_idx, vol_share in self.volume_shares[center_idx].items():
                #phi[vol_share_idx] = phi[vol_share_idx] + (rho[center_idx] * (vol_share)) # this sum and the brelow one are equivalent.
                #phi[center_idx] = phi[center_idx] + (rho[vol_share_idx] * (vol_share))
                phi[center_idx] = phi[center_idx] + (rho[vol_share_idx] * (vol_share))
        # the sum of phi is not equal to the bulk volume fraction (r=infiity) natom*vol_per_atom/cell_vol. this arises from the way we descritize the local volume fraction and the way we relate it to the local number density.
        if self.normalize:
            self.phi[col_name] = phi / np.sum(phi)
        else:
            self.phi[col_name] = phi

    def _run(self):
        """
        _run perform a list of operation over the columns in the input histogram.
        """
        for col_name in self.histogram.columns:
            self._set_args(col_name)
            self._number_density(col_name)
            self._volume_fraction(col_name)  

In [None]:
def distributions_generator(histograms, properties, radius_type, geometry, direction, particle_name, save_to=None,normalize=False,segments=False):
    """
    generates the local number density (rho) and volume fraction (phi) of the particle_name woth particle_type column name in properties dataframe along the direction of interest in the geometry of interest.
    
    Caution: 
    For the sumrule problem in the cylindrical goemetry, a simulation group is a collection of simulations that all have the same values for the number of monomers, the diameter of cylinder, and the size of crowders (assuming size of monomers is 1). An ensemble (usually with M number of simulations) is a collection of themodynamically-equivalent simulations that all have the same values for the number of monomers, the diameter of cylinder, the size of crowders, and the same number of crowders (assuming size of monomers is 1).  In standard statitatical mechanical approach, an ensmeble is equivalent a simulation, but here we used it to reffer to all the thermodynamically-equivalent simulations.
    In an ensemble-average simulation group, each ensemble is replaced with the average of all its simulation, so if we have M simulation in an ensemble and P ensembles in a group, then we have M*P simulations in a simulation group and P ensemble-averaged simulations in a ensemble-averaged simulation group.
    
    Parameters:
    histogram (dict): a dictionary of an ensemble or ensemble-averaged or group simulation in which the keys are the name of ensembles/ensemble-averaged groups/groups and the keys of the histogram dataframes. The names of the columns in each dataframe are the name of simulations in an ensemble or the name of ensemble-averaged group or the name of simulation group, the columns are the frequenies of partilces of the type of interest (see radius_type) in each of the bins, and the number of columns is the number of simulations in a ensemble, one in an ensemble-averaged group, or the number of ensmebles (=the number of ensemble-averaged) groups an a simulations group. 
    properties (pandas dataframe): the properties of each simulation/ensemble-averaged simulation/ensemble-averaged simulations of the simualtions/ensemble-averaged simulation/ensemble-averaged simulations in a ensemble/ensemble-averged group/simulation group.
    raduis_type (str): the name of the column in the properties in which the size (or diameter) of the particles are tabled. The particle type is the same for all the particles that their frequencies are counted in histogram. 
    geometry (str): the shape of the simulation box
    direction (str): the direction of interest in the geometry of interest.
    particle_name: the name of paticle type.
    save_to: path to which the output saved.
    normalize: whether normalize the distributions or not.
    segments: wether a simulation file is created from a bunch of segments or not -- This is the case for "all" type files.
    """
    densities = {} 
    vol_fractions = {}
    for bunch_name, bunch_histogram in histograms.items(): # a bunch of simulations in a group/ensemble/ensemble-averaged group
        if segments: # this is for all (bug+crowders) files
            r_particle = 0.5 * properties[properties.filename==bunch_name][radius_type].values[0]
        else:
            first_sim_name = bunch_name+'ens1' # All the N simulations in an ensemble have the same radius
            r_particle = 0.5 * properties[properties.filename==first_sim_name][radius_type].values[0]
        distributions = Distributions(bunch_histogram, properties, r_particle, geometry, direction,normalize=normalize)
        densities[bunch_name] = distributions.rho            
        vol_fractions[bunch_name] = distributions.phi
        if save_to != None:
            if segments:
                densities[bunch_name].to_csv(save_to+bunch_name+'-'+distributions.short_name_rho+particle_name+'.csv')
                vol_fractions[bunch_name].to_csv(save_to+bunch_name+'-'+distributions.short_name_phi+particle_name+'.csv')    
            else:
                cell_attrs = cellAttributes(bunch_name,geometry,warning=False)
                output = f'N{cell_attrs.nmon}D{cell_attrs.dcyl}ac{cell_attrs.dcrowd}nc{cell_attrs.ncrowd}'
                densities[bunch_name].to_csv(save_to+output+'-'+distributions.short_name_rho+particle_name+'.csv')
                vol_fractions[bunch_name].to_csv(save_to+output+'-'+distributions.short_name_phi+particle_name+'.csv')    
    return densities, vol_fractions



In [None]:
from dask.distributed import Client
from dask import delayed
from dask import compute
client = Client(n_workers=4)
client

### Bug: extaction and analyis: Single simulation fore testing and exploring

In [None]:
def extract_trj_bug(simulation_pair, geometry, save_to="./"):
    """
    extract_trj_bug does all the analysis on the a whole trajectory (simulation),

    Parameters:
    simulation_pair (a tuple of size 2): the pair of topology(first argument) and trajectory (second argument) of a simulation.
    geometry (string): the name of the geometry of the simulation box.
    save_to (str): address to which the output is saved

    Requirements:
    All the above-defined fucntions and classes, MDAnalysis.
    
    Caution:
    The histograms are for the cylindrical coordinate system in the geometry=cylindrical.
    For other goemteries you need to redfined them.
    """  
    print("This script for cylindrical geomery")
    print("Setting the name of analyze file...\n")
    today = datetime.date.today().strftime('%Y%m%d')
    cellAttrs = PipeLine.cellAttributes(simulation_pair[1],geometry)
    sim_name = cellAttrs.filename
    #ensGroup = "N"+str(cellAttrs.nmon)+'D'+str(cellAttrs.dcyl)+"_"
    outfile = save_to+sim_name+"-properties.csv"
    print("")
    print(sim_name+" is analyzing...")
    print("")
    with open(outfile, mode="w") as ensemble:
        ensemble.write(cellAttrs.cols)
        PipeLine.cylinder_write(ensemble,cellAttrs)

    print("Analyzing...")
    time_unit = 1.0 # time unit = dmon * sqrt(mmon * epmon)
    lj_nstep = cellAttrs.bdump # Sampling steps via dump command in Lammps
    lj_dt = cellAttrs.dt # Timestep during the sampling process in Lammps
    simulation_dt = lj_nstep * lj_dt * time_unit

    #cell = mda.Universe(simulation_pair[0], simulation_pair[1], format = 'AMIRLAMMPSDUMP', atom_style = "id resid type x y z", dt = simulation_dt)
    cell = mda.Universe(simulation_pair[0], simulation_pair[1], topology_format = 'DATA', format = 'LAMMPSDUMP', lammps_coordinate_convention= 'unscaled', atom_style = "id resid type x y z", dt = simulation_dt)
    chrm = cell.select_atoms('resid 1') # resid 1 is the atoms creating the chain
    print ("Caution:The histograms are for the cylindrical coordinate system in the geometry=cylindrical. For other goemteries you need to redfined them.")
    # radial direction of the cylindrical coordinate system
    edge_name = 'rEdges'
    bin_size = 0.1 * min(cellAttrs.dmon,cellAttrs.dcrowd)
    print(f"Bin size for r in the cylindrical geometry is set to 0.1*min(cellAttrs.dmon,cellAttrs.dcrowd)={bin_size} in a_m=1 units.")
    lmax = 0.5 * cellAttrs.dcyl
    redges, rhist_collectors = PipeLine.bin_ceate(sim_name, bin_size, 0.0, lmax, edge_name, save_to)

    # z direction of the cylindrical coordinate system
    edge_name = 'zEdges'
    bin_size = 0.5 * cellAttrs.dmon #min(cellAttrs.dmon,cellAttrs.dcrowd)
    print(f"Bin size for z in the cylindrical geometry is set to 0.5*min(cellAttrs.dmon,cellAttrs.dcrowd)={bin_size} in a_m=1 units.")
    lmax = cellAttrs.lcyl / 2.
    zedges, zhist_collectors = PipeLine.bin_ceate(sim_name, bin_size, -1.0 * lmax, lmax, edge_name, save_to)

    # theta of the cylindrical coordinate system
    edge_name = 'thetaEdges'
    bin_size = np.degrees(np.pi/36) #bin size 5 degrees
    print(f"Bin size for theta in the cylindrical geometry is set to {bin_size} in degrees.")
    thetaedges, thetahist_collectors = PipeLine.bin_ceate(sim_name, bin_size, -1*np.degrees(np.pi), np.degrees(np.pi), edge_name, save_to)

    # Chain end-to-end size distribution
    edge_name = 'rFloryEdges'
    lmax = cellAttrs.nmon * cellAttrs.dmon # The contour size of the chain
    bin_size = 0.5 * cellAttrs.dmon # in a_c=1.0 unit; for a_c<a_mon_small; check this
    print(f"Bin size for the PDF of end-to-end vector is set to 0.5*cellAttrs.dmon={bin_size} in a_m=1 units.")
    rFloryedges, rFloryhist_collectors = PipeLine.bin_ceate(sim_name, bin_size, 0, lmax, edge_name, save_to)

    #instantaneous quantity
    fsd_t = np.empty([0]) # the furthermost size using my own function fsd_cylinder.
    rFlory_t = np.empty([0]) # the end-to-end size, using my own function end_to_end.
    gyr_t = np.empty([0]) # radius of gyration

    if  any([rhist_collectors.any() != 0, zhist_collectors.any() != 0, thetahist_collectors.any() != 0, rFloryhist_collectors.any() != 0]):
        raise ValueError("One of the histogram collectors are not empty!")
    for ts in cell.trajectory: # the length of the for loop is equal to number of snapshots (configurations or frames)
        #chain size : the furthermost distance:
        fsd_t = np.append(fsd_t, np.array([PipeLine.fsd(chrm.positions)]), axis = 0)

        # radius of gyration
        gyr = chrm.radius_of_gyration()
        gyr_t = np.append(gyr_t, np.array([gyr]), axis = 0) # radius of gyration in each time step

        # end-to-end size distribution in eahc frame.
        rms = PipeLine.end_to_end(chrm.positions)
        rFlory_t = np.append(rFlory_t, np.array([rms]), axis = 0) # end-to-end vectors in each timestep

        # histogram in r direction
        dummy_hist, _ = np.histogram(rms, rFloryedges) # For one timestep, rms is equal to norm of end-to-end, so we use the later for histogram
        rFloryhist_collectors = np.add(rFloryhist_collectors, dummy_hist) 

        #number density in the cell's frame of reference
        # histogram in r direction
        rmon = np.linalg.norm(chrm.positions[:,:2], axis = 1) # r component of position of each monomer
        dummy_hist, _ = np.histogram(rmon, redges)
        rhist_collectors = np.add(rhist_collectors, dummy_hist)

        # histogram in z direction
        zmon = chrm.positions[:,2] # z component of position of each monomer
        dummy_hist, _ = np.histogram(zmon, zedges)
        zhist_collectors = np.add(zhist_collectors, dummy_hist)

        # histogram in theta 
        theta = np.degrees(np.arctan2(chrm.positions[:,1], chrm.positions[:,0])) # in degrees
        dummy_hist, _ = np.histogram(theta, thetaedges)
        thetahist_collectors = np.add(thetahist_collectors, dummy_hist)

    np.savetxt(save_to+sim_name+'-rHists.csv', rhist_collectors, delimiter = ',')
    np.savetxt(save_to+sim_name+'-zHists.csv', zhist_collectors, delimiter = ',')
    np.savetxt(save_to+sim_name+'-thetaHists.csv', thetahist_collectors, delimiter = ',')
    np.savetxt(save_to+sim_name+'-rFloryHists.csv', rFloryhist_collectors, delimiter = ',')
    np.savetxt(save_to+sim_name+'-fsd_t.csv', fsd_t, delimiter = ',')
    np.savetxt(save_to+sim_name+'-rFlory_t.csv', rFlory_t, delimiter = ',')
    np.savetxt(save_to+sim_name+'-gyr_t.csv', gyr_t, delimiter = ',')

    with open(outfile, mode="a") as ensemble:
        PipeLine.chain_stats(fsd_t, ensemble) # fsd,fsd_std,fsd_var,fsd_sem,
        PipeLine.chain_stats(rFlory_t, ensemble) # rflory,rflory_std,rflory_var,rflory_sem,
        PipeLine.chain_stats(gyr_t, ensemble) # gyr,gyr_std,gyr_var,gyr_sem,
        ensemble.write("{}\n".format(cell.trajectory.n_frames))
    print('done.') 

In [None]:
home = str(Path.home())
sim_name = 'N500epsilon5.0r5.5lz205.5sig0.8nc36036dt0.002bdump1000adump5000ens1'
path=home+'/' + sim_name
fname = glob(path+"/N*.bug.*")
fname = PipeLine.file_reader(fname) # This is a list with one member
geom = 'cylindrical'
print(fname)
PipeLine.extract_trj_bug(fname[0], geom) # A list with one member, the member is a tuple of a trj and data pair.
#PipeLine.rmsd_trj_bug(fname[0], geom)

In [None]:
#No need
sim_csvs = glob("./extracted/"+sim_name+"-bug/N*.csv")
PipeLine.analysis_sumrule_bug(sim_csvs, geom, "./analyzed/", "./analyzed/", round=3)

In [None]:
%%time
# analyze all the bug groups at once:
database = './' # parent path
simulation_type = 'bug' # bug or all
input_db_name = "extracted"
input_sim_groups = glob(database+input_db_name) # Path to bug extraction folders/files
output_db_name = "analyzed"
geometry = 'cylindrical'
analysis_delayed = []
for input_sim_group in input_sim_groups:
    analysis = delayed(PipeLine.whole_group_analysis_bug)(input_sim_group, input_db_name, output_db_name, simulation_type, geometry)
    analysis_delayed.append(analysis)

# it takes less than 5 min for 12 simulation groups with 4 workers.
results = compute(analysis_delayed)

### All: extaction and analyis: Single simulation fore testing and exploring

In [None]:
def extract_trj_all(all_data, all_trj, geometry, save_to="./"):
    """
    extract_all_trj does all the measurements on the a bug's whole trajectory (simulation) and returns a group of files as the result of the measurements.

    Caution:
    The histograms are for the cylindrical coordinate system in the geometry=cylindrical.For other goemteries you need to redfined them.
    
    Parameters:
    all_data (str): Lammps data (topology) file of the simulation.
    all_trj (str): Lammps dump (trajectory) file of the simulation.
    geometry (str): Shape of the simulation box.
    save_to (str): address to which the output is saved
    
    Returns:
    a group of files with different formats, depending on the type of measurements.

    Requirements:
    MDAnalsis, PipeLine 
    """
    
    print("Setting the name of analyze file...\n")
    today = datetime.date.today().strftime('%Y%m%d')
    cellAttrs = PipeLine.cellAttributes(all_trj,geometry, splitter='.lammpstrj')
    sim_name = cellAttrs.filename

    print("")
    print(sim_name+" is analyzing...")
    print("")

    time_unit = 1.0 # time unit = dmon * sqrt(mmon * epmon)
    lj_nstep = cellAttrs.bdump # Sampling steps via dump command in Lammps
    lj_dt = cellAttrs.dt # Timestep during the sampling process in Lammps
    simulation_dt = lj_nstep * lj_dt * time_unit

    #cell = mda.Universe(all_data, all_trj, format = 'AMIRLAMMPSDUMP', atom_style = "id resid type x y z", dt = simulation_dt)
    cell = mda.Universe(all_data, all_trj,  topology_format = 'DATA', format = 'LAMMPSDUMP', lammps_coordinate_convention= 'unscaled', atom_style = "id resid type x y z", dt = simulation_dt)
    crds = cell.select_atoms('resid 0') # resid 1 is the atoms creating the chain, resid 0 is crowders
    chrm = cell.select_atoms('resid 1') # resid 1 is the atoms creating the chain, resid 0 is crowders


    print ("Caution:The histograms are for the cylindrical coordinate system in the geometry=cylindrical. For other goemteries you need to redfined them.")
    
    # radial direction of the cylindrical coordinate system
    bin_size = 0.1 * min(cellAttrs.dmon, cellAttrs.dcrowd)
    print(f"Bin size for r in the cylindrical geometry is set to 0.1*min(cellAttrs.dmon,cellAttrs.dcrowd)={bin_size} in a_m=1 units.")
    lmax = 0.5 * cellAttrs.dcyl
    edge_name = 'rEdgesCrd'
    redges, rhists_crd = PipeLine.bin_ceate(sim_name, bin_size, 0.0, lmax, edge_name, save_to)
    edge_name = 'rEdgesMon'
    _ , rhists_mon = PipeLine.bin_ceate(sim_name, bin_size, 0.0, lmax, edge_name, save_to)
    
    # z direction of the cylindrical coordinate system
    if cellAttrs.dcrowd < 1.0:
        bin_size = 0.25#  * min(cellAttrs.dmon, cellAttrs.dcrowd)
    else:
        bin_size = 0.5
    print(f"Bin size for z in the cylindrical geometry is set to 0.5*min(cellAttrs.dmon,cellAttrs.dcrowd)={bin_size} in a_m=1 units.")
    lmax = 0.5 * cellAttrs.lcyl
    edge_name = 'zEdgesCrd'
    zedges, zhists_crd = PipeLine.bin_ceate(sim_name, bin_size, -1.0 * lmax, lmax, edge_name, save_to)
    edge_name = 'zEdgesMon'
    _ , zhists_mon = PipeLine.bin_ceate(sim_name, bin_size, -1.0 * lmax, lmax, edge_name, save_to)
    
    # theta of the cylindrical coordinate system
    bin_size = np.degrees(np.pi/36) #bin size 5 degrees
    edge_name = 'thetaEdgesCrd'
    thetaedges, thetahists_crd = PipeLine.bin_ceate(sim_name, bin_size, -1*np.degrees(np.pi), np.degrees(np.pi), edge_name, save_to)
    edge_name = 'thetaEdgesMon'
    _ , thetahists_mon = PipeLine.bin_ceate(sim_name, bin_size, -1*np.degrees(np.pi), np.degrees(np.pi), edge_name, save_to)

    # check if any of the histograms are empty or not.
    if any([rhists_crd.any() != 0, rhists_mon.any() != 0, zhists_crd.any() != 0, zhists_mon.any() != 0, thetahists_crd.any() != 0, thetahists_mon.any() != 0]):
        raise ValueError("One of the histogram collectors are not empty!")
               
    for ts in cell.trajectory: # the length of the for loop is equal to number of snapshots (configurations or frames)
        #number density in the cell's frame of reference
        # histogram in r direction
        rpos = np.linalg.norm(crds.positions[:,:2], axis = 1) # r component of position of each crowder
        dummy_hist, _ = np.histogram(rpos, redges)
        rhists_crd = np.add(rhists_crd, dummy_hist)
        
        rpos = np.linalg.norm(chrm.positions[:,:2], axis = 1) # r component of position of each monomer
        dummy_hist, _ = np.histogram(rpos, redges)
        rhists_mon = np.add(rhists_mon, dummy_hist)

        # histogram in z direction
        zpos = crds.positions[:,2] # z component of position of each crowder
        dummy_hist, _ = np.histogram(zpos, zedges)
        zhists_crd = np.add(zhists_crd, dummy_hist)
        
        zpos = chrm.positions[:,2] # z component of position of each monomer
        dummy_hist, _ = np.histogram(zpos, zedges)
        zhists_mon = np.add(zhists_mon, dummy_hist)

        # histogram in theta 
        theta = np.degrees(np.arctan2(crds.positions[:,1], crds.positions[:,0])) # in degrees
        dummy_hist, _ = np.histogram(theta, thetaedges)
        thetahists_crd = np.add(thetahists_crd, dummy_hist)
        
        theta = np.degrees(np.arctan2(chrm.positions[:,1], chrm.positions[:,0])) # in degrees
        dummy_hist, _ = np.histogram(theta, thetaedges)
        thetahists_mon = np.add(thetahists_mon, dummy_hist)
        
    lastname = 'Crd'
    np.savetxt(save_to+sim_name+'-rHists'+lastname+'.csv', rhists_crd, delimiter = ',')
    np.savetxt(save_to+sim_name+'-zHists'+lastname+'.csv', zhists_crd, delimiter = ',')
    np.savetxt(save_to+sim_name+'-thetaHists'+lastname+'.csv', thetahists_crd, delimiter = ',')
    
    lastname = 'Mon'
    np.savetxt(save_to+sim_name+'-rHists'+lastname+'.csv', rhists_mon, delimiter = ',')
    np.savetxt(save_to+sim_name+'-zHists'+lastname+'.csv', zhists_mon, delimiter = ',')
    np.savetxt(save_to+sim_name+'-thetaHists'+lastname+'.csv', thetahists_mon, delimiter = ',')
    print('done.')

In [None]:
home = str(Path.home())
sim_name = 'N500epsilon5.0r5.5lz205.5sig0.8nc36036dt0.002bdump1000adump5000ens1'
path=home+'/' + sim_name
trj_files = glob(path+"/N*.all.lammpstrj")
all_tuples = PipeLine.file_reader(trj_files,extensions=['lammpstrj',])
all_trjs = [all_tuple[0] for all_tuple in all_tuples]
data_file = glob(path+"/N*.all.data")
all_data = PipeLine.file_reader(data_file,extensions=['all.data'])
all_data = all_data[0][0]
geom = 'cylindrical'
trjs_computed = []
for all_trj in all_trjs:
    trj_delayed = delayed(PipeLine.extract_trj_all)(all_data, all_trj, geom, save_to="./")
    trjs_computed.append(trj_delayed)

In [None]:
(36036*4* 0.4**3)/(205.5*2* 5**2 *3)

In [None]:
%%time
results = compute(trjs_computed)

In [None]:
database = './'
simulation_type = 'all' # bug or all
input_db_name = 'extracted'
input_sim_groups = glob(database+input_db_name) # the dot "." with"all" is crutial.
output_db_name = "analyzed"
properties_csvs = glob("./analyzed/N*-bug-analyzed/N*-properties.csv")
all_properties = []
for properties_csv in properties_csvs:
    df = pd.read_csv(properties_csv,index_col=0)
    all_properties.append(df) 
all_properties = pd.concat(all_properties)
all_properties.reset_index(inplace=True, drop=True)
print(all_properties.head())

In [None]:
%%time
geometry = 'cylindrical'
analysis_delayed = []   
for input_sim_group in input_sim_groups: # run over all simulations in all the groups
    sim_csvs = glob(input_sim_group+"/N*/N*.csv")
    group_name = sim_csvs[0].split("/")[2]
    group_type = database+output_db_name+"/"+group_name+'-'+output_db_name+"-whole_simulations/"
    try:
        Path(group_type).mkdir(parents=True, exist_ok=False)
    except FileExistsError as error:
        print(error)
    PipeLine.analysis_sumrule_segment(sim_csvs, all_properties, geometry, group_type)
analysis = delayed(PipeLine.whole_group_analysis_segments)(input_sim_group, input_db_name, output_db_name, simulation_type, all_properties, geometry)
analysis_delayed.append(analysis)
# it takes around 20 minutes.
results = compute(analysis_delayed)

In [None]:
hist_names = ['rHistsCrd','rHistsMon','zHistsCrd','zHistsMon','thetaHistsCrd','thetaHistsMon','rPhisCrd','rPhisMon','rRhosCrd','rRhosMon','zPhisCrd','zPhisMon','zRhosCrd','zRhosMon']
sim_csvs = glob("./analyzed/N500epsilon5.0r5.5lz205.5sig0.8nc12012dt0.002bdump1000adump5000ens1-all-analyzed-whole_simulations/N*.csv")
seperator = '-'
geom = 'cylindrical'
for hist_name in hist_names:
    ext = [seperator+hist_name+'.csv']
    hist_files = PipeLine.file_reader(sim_csvs, extensions=ext)
    hist_dicts = PipeLine.ensemble(hist_files, hist_name, geom, sep=seperator, index_col=0, skiprows=1, save_to= "./analyzed/") # 
    ens_evg_dict_rhist = PipeLine.group(hist_dicts, hist_name, geom, save_to="./analyzed/") # ensemble-averaged files

# All in ones properties

In [None]:
def cylindrical_norm_without_chain_sizes(all_ens_avg_props):
    """
    polymer_cyl_norm rescales some of the physical properties in an ensemble-average properties dataframe in the cylindrical confinement.
    
    Parameters:
    all_ens_avg_props: a dataframe of all the ensemble-averaged properties.
    
    Return:
    all_ens_avg_props with several new columns.
    """
    all_ens_avg_props['phi_c_bulk_normalized'] = (all_ens_avg_props['dmon'] * all_ens_avg_props['phi_c_bulk'])/all_ens_avg_props['dcrowd'] 
    #all_ens_avg_props['phi_c'] = all_ens_avg_props['phi_c'].round(decimals=3)
    all_ens_avg_props['phi_c_bulk_eff'] = (np.pi * all_ens_avg_props['dcrowd'] ** 3 / 6) * all_ens_avg_props['ncrowd'] / ((np.pi / 4 * (all_ens_avg_props['dcyl']-all_ens_avg_props['dcrowd']) ** 2) * all_ens_avg_props['lcyl'])
    all_ens_avg_props['phi_c_bulk_eff_normalized'] = (all_ens_avg_props['dmon'] * all_ens_avg_props['phi_c_bulk_eff']) / all_ens_avg_props['dcrowd'] 
    return all_ens_avg_props

In [None]:
# create one dataframe of all the properties files:
properties_files = glob("./analyzed/N*-bug-analyzed/N*-properties.csv")
properties_files = PipeLine.file_reader(properties_files,extensions=['-properties.csv'])
properties_all_in_one = PipeLine.all_in_one_properties(properties_files, save_to="./analyzed/", round_to=4, index_col=0)
properties_files_ens_avg = glob("./analyzed/N*-bug-analyzed-ens_avg/N*-properties-ens_avg.csv")
properties_files_ens_avg = PipeLine.file_reader(properties_files_ens_avg,extensions=['-properties-ens_avg.csv'])
properties_all_in_one_ens_avg = PipeLine.all_in_one_properties(properties_files_ens_avg, ens_avg=True, norm_func=cylindrical_norm_without_chain_sizes, save_to="./analyzed/", round_to=4, index_col=0)

In [None]:
%%time
# distributions:
database = './analyzed/'
dist_db= database+"N*-all-analyzed-ens_avg/N*.csv"
properties_db = './analyzed/all_in_one-properties-ens_avg-normalized.csv'
simulation_type = 'all'
geometry='cylindrical'
radial_dists = PipeLine.generator_dist_all_in_one(dist_db, properties_db, simulation_type, geometry, 'radial')
longitudinal_dists = PipeLine.generator_dist_all_in_one(dist_db, properties_db, simulation_type, geometry, 'longitudinal')

# Quick viz: 

In [None]:
distributions.phi_c_bulk.drop_duplicates().values

In [None]:
distributions[distributions.phi_c_bulk==0.2993]['hist_crd_z'].sum()/36036

In [None]:
70000/5

In [None]:
parent = './analyzed/'
simulation_type = 'all' # all or bug
attribute = 'distributions'
direction_name = 'longitudinal'
direction = 'z'
y_norm = '_norm'
x_norm_mon = '_mon'#'_mon'
x_norm_crd = '_crd'#'_crd'
distributions = pd.read_csv(parent+'all_in_one-'+simulation_type+'-'+attribute+'-'+direction_name+'-ens_avg.csv',index_col=0)

#distributions = pd.read_csv('./binsize0.2/all_in_one-'+simulation_type+'-'+attribute+'-'+direction_name+'-ens_avg.csv',index_col=0)

binsize="0.4"
group_names = list(set(distributions.group_name))
sns.set_context('paper')
sns.set_style("ticks")
for group_name in group_names:
    group = distributions[distributions.group_name==group_name]
    fig, axes = plt.subplots(nrows=3,ncols=1,figsize=(16,12))
    sns.lineplot(x=direction+'_norm', y='hist_crd_'+direction, hue='phi_c_bulk', data=group,ax=axes[0],legend='full')
    sns.lineplot(x=direction+'_norm', y='rho_crd_'+direction, hue='phi_c_bulk', data=group,ax=axes[1],legend='full')
    sns.lineplot(x=direction+'_norm', y='phi_crd_'+direction, hue='phi_c_bulk', data=group,ax=axes[2],legend='full')
    fname = group_name+'-crd-'+direction_name+'-'+direction+'-'+binsize+".pdf"
    plt.savefig(fname,dpi=200)
    plt.savefig(fname,dpi=200)

In [None]:
x_norm_mon = '_mon'#'_mon'
x_norm_crd = '_crd'#'_crd'
distributions = pd.read_csv(parent+'all_in_one-'+simulation_type+'-'+attribute+'-'+direction_name+'-ens_avg.csv',index_col=0)
group_names = list(set(distributions.group_name))
sns.set_context('paper')
sns.set_style("ticks")
for group_name in group_names:
    group = distributions[distributions.group_name==group_name]
    fig, axes = plt.subplots(nrows=3,ncols=1,figsize=(16,12))
    sns.lineplot(x=direction+'_norm'+x_norm_mon, y='hist_mon_'+direction, hue='phi_c_bulk', data=group,ax=axes[0],legend='full')
    sns.lineplot(x=direction+'_norm'+x_norm_mon, y='rho_mon_'+direction, hue='phi_c_bulk', data=group,ax=axes[1],legend='full')
    sns.lineplot(x=direction+'_norm'+x_norm_mon, y='phi_mon_'+direction, hue='phi_c_bulk', data=group,ax=axes[2],legend='full')
    fname = group_name+'-mon-'+direction_name+'-'+direction+'-'+binsize+".pdf"
    plt.savefig(fname,dpi=200)

# Denisty analysis from MDA

In [None]:
home = str(Path.home())
path=home+'/N500epsilon5.0r5.5lz205.5sig0.8nc12012dt0.002bdump1000adump5000ens1'
fname = glob(path+"/N*.bug.*")
fname = PipeLine.file_reader(fname) # This is a list with one member
geometry = 'cylindrical'
print(fname)
simulation_pair = fname[0]
save_to="./"
#extract_trj_bug(fname[0], geom) # A list with one member, the member is a tuple of a trj and data pair.
#PipeLine.rmsd_trj_bug(fname[0], geom)

In [None]:
from MDAnalysis.analysis import density

print("This script for cylindrical geomery")
print("Setting the name of analyze file...\n")
today = datetime.date.today().strftime('%Y%m%d')
cellAttrs = PipeLine.cellAttributes(simulation_pair[1],geometry)
sim_name = cellAttrs.filename
#ensGroup = "N"+str(cellAttrs.nmon)+'D'+str(cellAttrs.dcyl)+"_"
outfile = save_to+sim_name+"-properties.csv"

print("")
print(sim_name+" is analyzing...")
print("")
with open(outfile, mode="w") as ensemble:
    ensemble.write(cellAttrs.cols)
    PipeLine.cylinder_write(ensemble,cellAttrs)

    print("Analyzing...")
    time_unit = 1.0 # time unit = dmon * sqrt(mmon * epmon)
    lj_nstep = cellAttrs.bdump # Sampling steps via dump command in Lammps
    lj_dt = cellAttrs.dt # Timestep during the sampling process in Lammps
    simulation_dt = lj_nstep * lj_dt * time_unit

cell = mda.Universe(simulation_pair[0], simulation_pair[1], format = 'AMIRLAMMPSDUMP', atom_style = "id resid type x y z", dt = simulation_dt)
chrm = cell.select_atoms('resid 1') # resid 1 is the atoms creating the chain
print ("Caution:The histograms are for the cylindrical coordinate system in the geometry=cylindrical. For other goemteries you need to redfined them.")
#u = Universe(TPR, XTC)
#ow = u.select_atoms("name OW")
D = density.DensityAnalysis(chrm, delta=0.4)
D.run()
#D.density.convert_density('TIP4P')