andi_datasets/analysis.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../source_nbs/lib_nbs/analysis.ipynb.

# %% auto 0
__all__ = ['get_angle', 'dataset_angles', 'msd_analysis', 'vacf', 'CH_changepoints', 'CRLB_D']

# %% ../source_nbs/lib_nbs/analysis.ipynb 2
import numpy as np
import math

# %% ../source_nbs/lib_nbs/analysis.ipynb 5
def get_angle(a:tuple, # 2d position point A
              b:tuple, # 2d position point B
              c:tuple # 2d position point C       
             ) -> tuple:  # angle between segments AB and BC points
                
    ''' Calculates the angle between the segments generate by three points '''
    ang = math.degrees(math.atan2(c[1]-b[1], c[0]-b[0]) - math.atan2(a[1]-b[1], a[0]-b[0]))
    return ang + 360 if ang < 0 else ang

def dataset_angles(trajs:list, # set of trajectories from which to calculate angles
                  ) -> list: # list of angles between displacements
    '''Given a set of trajectories, calculate all angles between displacements'''
    angles = []
    for traj in trajs:
        for a, b, c in zip(traj[:, :-2].transpose(), traj[:, 1:-1].transpose(), traj[:, 2:].transpose()):
            angles.append(get_angle(a, b, c))
    return angles

# %% ../source_nbs/lib_nbs/analysis.ipynb 7
class msd_analysis():
    def __init__(self):
        ''' Contains mean squared displacement (MSD) based methods to analyze trajectories.  '''
        

    def tamsd(self, 
              trajs:np.ndarray, 
              t_lags:np.ndarray,
              dim = 1
             ):
        '''
        Calculates the time average mean squared displacement (TA-MSD) of a trajectory at various time lags,
        
        Parameters
        ----------
        trajs : np.array
            Set of trajectories of dimenions NxTxD (N: number of trajectories, T: lenght, D: dimension)
        
        t_lags : list | np.array
            Time lags used for the TA-MSD
        
        dim : int
            Dimension of the trajectories (currently only 1 and 2 supported)
        
        Returns       
        -------
        np.array
            TA-MSD of each trayectory / t_lag
            
        '''
        tamsd = np.zeros((len(t_lags), trajs.shape[0]), dtype= float)
        
        for idx, tlag in enumerate(t_lags):                  
            tamsd[idx, :] = ((trajs[:, tlag:, :]-trajs[:, :-tlag, :])**2).sum(-1).mean(1)
                                   
            
        return tamsd    

    def get_diff_coeff(self, 
                       trajs:np.ndarray, 
                       t_lags:list = None):
        '''
        Calculates the diffusion coefficient of a trajectory by means of the linear
        fitting of the TA-MSD.
        
        Parameters
        ----------
        traj : np.array
            Set of trajectories of dimenions NxTxD (N: number of trajectories, T: lenght, D: dimension)
        
        t_lags : bool | list
            Time lags used for the TA-MSD.
        
        Returns       
        -------
        np.array
            Diffusion coefficient of the given trajectory.          
        
        '''
        
        # To account for previous versions of this function, we correct if given a single 1D trajectory
        if len(trajs.shape) == 1:
            trajs = trajs[np.newaxis, :, np.newaxis]
        
        if not t_lags:
            N_t_lags = max(4, int(trajs.shape[1]*0.1))
            t_lags = np.arange(1, N_t_lags)

        tasmd = self.tamsd(trajs, t_lags)
        
        return np.polyfit(t_lags, tasmd, deg = 1)[0, :]/2/trajs.shape[-1]

    def get_exponent(self, 
                     trajs:np.ndarray, 
                     t_lags:list = None):
        '''
        Calculates the diffusion coefficient of a trajectory by means of the linear
        fitting of the TA-MSD.
        
        Parameters
        ----------
        traj : np.array
            Set of trajectories of dimenions NxTxD (N: number of trajectories, T: lenght, D: dimension)
        
        t_lags : bool | list
            Time lags used for the TA-MSD.
        
        Returns       
        -------
        np.array
            Diffusion coefficient of the given trajectory.          
        
        '''
        
        # To account for previous versions of this function, we correct if given a single 1D trajectory
        if len(trajs.shape) == 1:
            trajs = trajs[np.newaxis, :, np.newaxis]
        
        if not t_lags:
            N_t_lags = max(4, int(trajs.shape[1]*0.1))
            t_lags = np.arange(1, N_t_lags)

        tasmd = self.tamsd(trajs, t_lags)
        
        return np.polyfit(np.log(t_lags), np.log(tasmd), deg = 1)[0]
    
    

# %% ../source_nbs/lib_nbs/analysis.ipynb 16
def vacf(trajs, 
         delta_t:int | list | np.ndarray  = 1, 
         taus:bool | list | np.ndarray = None):
    '''
    Calculates the velocity autocorrelation function for 
    the given set of trajectories.
    
    Parameters
    ----------
    trajs : np.array
     NxT matrix containing N trajectories of length T.
        
    delta_t : int | list | array
     If not None, the vacf is calculated in the demanded time lags. 
        
    taus : bool |  list | array
     Time windows at wich the vacf is calculated. 
    
    Returns
    -------
    np.array
        VACF of the given trajectories and the given time windows.        
    '''
    if isinstance(delta_t, int): delta_t = [delta_t]
    
    if taus is None: taus = np.arange(1, trajs.shape[1]).astype(int)
        
    V = np.zeros((len(delta_t), len(taus)))
    for idx_d, delta in enumerate(delta_t):
        # Calculate the velocity
        velocity = trajs[: ,delta:] - trajs[:,:-delta]
        velocity /= delta_t   
    
    
        for idx_t, tau in enumerate(taus):
            if tau == 0:
                V[idx_d, idx_t] = (velocity**2).mean()
            else:
                V[idx_d, idx_t] = (velocity[:, :-tau]*velocity[:, tau:]).mean()
        V[idx_d, :] /= V[idx_d, 0]
        
    return V

# %% ../source_nbs/lib_nbs/analysis.ipynb 20
from scipy.spatial import ConvexHull

def CH_changepoints(trajs, 
                    tau:int = 10, 
                    metric:{'volume', 'area'} = 'volume'):
    ''' 
    Computes the changes points a multistate trajectory based on the Convex Hull approach proposed in PRE 96 (022144), 2017.
    
    Parameters
    ----------
    trajs : np.array
     NxT matrix containing N trajectories of length T.
        
    tau : int
     Time window over which the CH is calculated.
        
    metric : {'volume', 'area'} 
     Calculate change points w.r.t. area or volume of CH.
    
    Returns
    -------
    list
        Change points of the given trajectory.
    '''
    
    CPs = []
    for traj in trajs:
        traj = np.array(traj)

        Sd = np.zeros(traj.shape[0]-2*tau)
        for k in range(traj.shape[0]-2*tau):
            if metric == 'volume':
                Sd[k] = ConvexHull(traj[k:(k+2*tau)]).volume
            elif metric == 'area':
                Sd[k] = ConvexHull(traj[k:(k+2*tau)]).area

        below_mean = Sd < Sd.mean()
        cp_traj = np.argwhere(below_mean[1:] != below_mean[:-1])+1
        CPs.append(cp_traj+tau)

    return CPs

# %% ../source_nbs/lib_nbs/analysis.ipynb 24
def CRLB_D(T:int, # Length of the trajectory
           dim:int = 1 # Dimension of the trajectoy
          ) ->float: # Cramér-Rao bound 
    '''
    Calculates the bound for S(D)/D, i.e. ratio between the standard deviation and the expected value of D
    This holds for x->0 only (i.e. no noise)! See PRE 85, 061916 (2012) for full equation.
    '''
    if dim == 1:
        return 2*((4-3*T)/((4-2*T)*(T-1)))**(1/2)
    if dim == 2:
        return ((3*T-4)/((T-1)*(T-2)))**(1/2)