In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
ts1 = np.loadtxt("ts1.txt")
ts2 = np.loadtxt("ts2.txt")
ts3 = np.loadtxt("ts3.txt")

In [61]:
import numpy as np
from Operations.CO_FirstCrossing import CO_FirstCrossing

def CO_HistogramAMI(y, tau = 1, meth = 'even', numBins = 10):
    """
    CO_HistogramAMI: The automutual information of the distribution using histograms.

    Parameters:
    y (array-like): The input time series
    tau (int, list or str): The time-lag(s) (default: 1)
    meth (str): The method of computing automutual information:
                'even': evenly-spaced bins through the range of the time series,
                'std1', 'std2': bins that extend only up to a multiple of the
                                standard deviation from the mean of the time series to exclude outliers,
                'quantiles': equiprobable bins chosen using quantiles.
    num_bins (int): The number of bins (default: 10)

    Returns:
    float or dict: The automutual information calculated in this way.
    """
    # Use first zero crossing of the ACF as the time lag
    if isinstance(tau, str) and tau in ['ac', 'tau']:
        tau = CO_FirstCrossing(y, 'ac', 0, 'discrete')
    
    # Bins for the data
    # same for both -- assume same distribution (true for stationary processes, or small lags)
    if meth == 'even':
        b = np.linspace(np.min(y), np.max(y), numBins + 1)
        # Add increment buffer to ensure all points are included
        inc = 0.1
        b[0] -= inc
        b[-1] += inc
    elif meth == 'std1': # bins out to +/- 1 std
        b = np.linspace(-1, 1, numBins + 1)
        if np.min(y) < -1:
            b = np.concatenate(([np.min(y) - 0.1], b))
        if np.max(y) > 1:
            b = np.concatenate((b, [np.max(y) + 0.1]))
    elif meth == 'std2': # bins out to +/- 2 std
        b = np.linspace(-2, 2, numBins + 1)
        if np.min(y) < -2:
            b = np.concatenate(([np.min(y) - 0.1], b))
        if np.max(y) > 2:
            b = np.concatenate((b, [np.max(y) + 0.1]))
    elif meth == 'quantiles': # use quantiles with ~equal number in each bin
        b = np.quantile(y, np.linspace(0, 1, numBins + 1))
        b[0] -= 0.1
        b[-1] += 0.1
    else:
        raise ValueError(f"Unknown method '{meth}'")
    
    # Sometimes bins can be added (e.g., with std1 and std2), so need to redefine numBins
    numBins = len(b) - 1

    # Form the time-delay vectors y1 and y2
    if not isinstance(tau, (list, np.ndarray)):
        # if only single time delay as integer, make into a one element list
        tau = [tau]

    amis = np.zeros(len(tau))

    for i, t in enumerate(tau):
        y1 = y[:-t]
        y2 = y[t:]

        # Joint distribution of y1 and y2
        pij, _, _ = np.histogram2d(y1, y2, bins=(b, b))
        pij = pij[:numBins, :numBins]  # joint
        pij = pij / np.sum(pij)  # normalize
        pi = np.sum(pij, axis=1)  # marginal
        pj = np.sum(pij, axis=0)  # other marginal

        pii = np.tile(pi, (numBins, 1)).T
        pjj = np.tile(pj, (numBins, 1))

        r = pij > 0  # Defining the range in this way, we set log(0) = 0
        amis[i] = np.sum(pij[r] * np.log(pij[r] / pii[r] / pjj[r]))

    if len(tau) == 1:
        return amis[0]
    else:
        return {f'ami{i+1}': ami for i, ami in enumerate(amis)}


In [75]:
CO_HistogramAMI(ts3, 'ac', meth='quantiles')

0.04761744653583484

In [60]:
stats.kurtosis(ts3_zs, fisher=False)

3.0797345972733234

In [3]:
def stepBinary(X):
    # Transform real values to 0 if <=0 and 1 if >0:
    Y = np.zeros(len(X))
    Y[X > 0] = 1

    return Y

In [77]:
expFunc = lambda x, a, b : a * np.exp(b * x)

In [76]:
def firstUnder_fn(x, m, p):
    """
    Find the value of m for the first time p goes under the threshold, x. 
    p and m vectors of the same length
    """
    first_i = next((m_val for m_val, p_val in zip(m, p) if p_val < x), m[-1])
    return first_i

In [22]:
def BF_Binarize(y, binarizeHow='diff'):
    """
    """
    if binarizeHow == 'diff':
        # Binary signal: 1 for stepwise increases, 0 for stepwise decreases
        yBin = stepBinary(np.diff(y))
    
    elif binarizeHow == 'mean':
        # Binary signal: 1 for above mean, 0 for below mean
        yBin = stepBinary(y - np.mean(y))
    
    elif binarizeHow == 'median':
        # Binary signal: 1 for above median, 0 for below median
        yBin = stepBinary(y - np.median(y))
    
    elif binarizeHow == 'iqr':
        # Binary signal: 1 if inside interquartile range, 0 otherwise
        iqr = np.quantile(y,[.25,.75])
        iniqr = np.logical_and(y > iqr[0], y<iqr[1])
        yBin = np.zeros(len(y))
        yBin[iniqr] = 1
    else:
        raise ValueError(f"Unknown binary transformation setting '{binarizeHow}'")

    return yBin

In [25]:
mea = BF_Binarize(ts1, binarizeHow='mean')

In [27]:
mat = np.random.randn(3, 20)

In [31]:
np.size(mat, 1)

20

In [32]:
def BF_SignChange(y, doFind=0):
    """
    Where a data vector changes sign.

    """
    if doFind == 0:
        return (np.multiply(y[1:],y[0:len(y)-1]) < 0)
    indexs = np.where((np.multiply(y[1:],y[0:len(y)-1]) < 0))

    return indexs


In [36]:
out = BF_SignChange(ts1, doFind=1)

In [43]:
out

(array([ 14,  30,  46,  61,  77,  93, 108, 124, 140, 156, 171, 187, 203,
        218, 234, 250, 266, 281, 297, 313, 328, 344, 360, 375, 391, 407,
        423, 438, 454, 470, 485, 501, 517, 533, 548, 564, 580, 595, 611,
        627, 643, 658, 674, 690, 705, 721, 737, 752, 768, 784, 800, 815,
        831, 847, 862, 878, 894, 910, 925, 941, 957, 972, 988]),)

In [44]:
from scipy.stats import moment
import numpy as np

In [45]:
def DN_Moments(y, theMom):
    """
    A moment of the distribution of the input time series.
    
    """
    out = moment(y, theMom) / np.std(y) # normalized

    return out

In [51]:
DN_Moments(ts1, 5)

-0.005128976116081682

In [65]:
import numpy as np
from scipy import stats

def DN_Mean(y, mean_type='arithmetic'):
    """
    A given measure of location of a data vector.

    Parameters:
    y (array-like): The input data vector
    mean_type (str): The type of mean to calculate
        'norm' or 'arithmetic': arithmetic mean
        'median': median
        'geom': geometric mean
        'harm': harmonic mean
        'rms': root-mean-square
        'iqm': interquartile mean
        'midhinge': midhinge

    Returns:
    float: The calculated mean value

    Raises:
    ValueError: If an unknown mean type is specified
    """
    y = np.array(y)
    N = len(y)

    if mean_type in ['norm', 'arithmetic']:
        return np.mean(y)
    elif mean_type == 'median':
        return np.median(y)
    elif mean_type == 'geom':
        return stats.gmean(y)
    elif mean_type == 'harm':
        return stats.hmean(y)
    elif mean_type == 'rms':
        return np.sqrt(np.mean(y**2))
    elif mean_type == 'iqm':
        p = np.percentile(y, [25, 75])
        return np.mean(y[(y >= p[0]) & (y <= p[1])])
    elif mean_type == 'midhinge':
        p = np.percentile(y, [25, 75])
        return np.mean(p)
    else:
        raise ValueError(f"Unknown mean type '{mean_type}'")

In [66]:
def DN_ProportionValues(x, propWhat='positive'):

    N = len(x)

    if propWhat == 'zeros':
        # returns the proportion of zeros in the input vector
        out = sum(x == 0) / N
    elif propWhat == 'positive':
        out = sum(x > 0) / N
    elif propWhat == 'geq0':
        out = sum(x >= 0) / N
    else:
        raise ValueError(f"Unknown condition to measure: {propWhat}")

    return out


In [74]:
DN_ProportionValues(ts3, 'geq0')

0.519

In [75]:
def DN_Quantile(y, p=0.5):
    """
    Calculates the quantile value at a specified proportion, p.

    Parameters:
    y (array-like): The input data vector
    p (float): The quantile proportion (default is 0.5, which is the median)

    Returns:
    float: The calculated quantile value

    Raises:
    ValueError: If p is not a number between 0 and 1
    """
    if p == 0.5:
        print("Using quantile p = 0.5 (median) by default")
    
    if not isinstance(p, (int, float)) or p < 0 or p > 1:
        raise ValueError("p must specify a proportion, in (0,1)")
    
    return np.quantile(y, p)


In [82]:
DN_Quantile(ts1, p=0.3)

-0.58822

In [97]:
from scipy.stats import uniform, norm, geom

In [98]:
geom.fit(ts2)

AttributeError: 'geom_gen' object has no attribute 'fit'

In [107]:
def EN_CID(y):
    """
    Simple complexity measure of a time series.

    Estimates of 'complexity' of a time series as the stretched-out length of the
    lines resulting from a line-graph of the time series.

    Parameters:
    y (array-like): the input time series

    Returns:
    out (dict): 
    """
    CE1 = f_CE1(y)
    CE2 = f_CE2(y)

    minCE1 = f_CE1(np.sort(y))
    minCE2 = f_CE2(np.sort(y))

    CE1_norm = CE1 / minCE1
    CE2_norm = CE2 / minCE2

    out = {'CE1':CE1,'CE2':CE2,'minCE1':minCE1,'minCE2':minCE2,
            'CE1_norm':CE1_norm,'CE2_norm':CE2_norm}

    return out

def f_CE1(y):
    return np.sqrt(np.mean(np.power(np.diff(y),2)))

def f_CE2(y):
    return np.mean(np.sqrt(1 + np.power(np.diff(y),2)))


In [110]:
EN_CID(ts3)

{'CE1': 1.4397258123179755,
 'CE2': 1.623039534424403,
 'minCE1': 0.028789278256714637,
 'minCE2': 1.0003926815512982,
 'CE1_norm': 50.00909711872275,
 'CE2_norm': 1.6224024469147185}

In [111]:
def DN_Spread(y, spreadMeasure='std'):
    """
    Measure of spread of the input time series.
    Returns the spread of the raw data vector, as the standard deviation,
    inter-quartile range, mean absolute deviation, or median absolute deviation.
    """
    if spreadMeasure == 'std':
        out = np.std(y)
    elif spreadMeasure == 'iqr':
        out = stats.iqr(y)
    elif spreadMeasure == 'mad':
        out = mad(y)
    elif spreadMeasure == 'mead':
        out = mead(y)
    else:
        raise ValueError('spreadMeasure must be one of std, iqr, mad or mead')

    return out

def mad(data, axis=None):
    return np.mean(np.absolute(data - np.mean(data, axis)), axis)

def mead(data, axis=None):
    return np.median(np.absolute(data - np.median(data, axis)), axis)


In [118]:
DN_Spread(ts2, spreadMeasure='mead')

0.66015

In [122]:
def DN_Unique(x):
    """
    The proportion of the time series that are unique values.

    Parameters:
    x (array-like): the input data vector

    Returns:
    out (float): the proportion of time series that are unique values
    """

    return len(np.unique(x)) / len(x)


In [123]:
DN_Unique(ts2)

0.977

In [127]:
len(np.unique(ts1))

897

In [148]:
def CO_NonlinearAutocorr(y,taus,doAbs ='empty'):

    if doAbs == 'empty':

        if len(taus) % 2 == 1:

            doAbs = 0

        else:

            doAbs = 1

    N = len(y)
    tmax = np.max(taus)

    nlac = y[tmax:N]

    for i in taus:

        nlac = np.multiply(nlac,y[ tmax - i:N - i ])

    if doAbs:

        return np.mean(np.absolute(nlac))

    else:

        return np.mean(nlac)

In [151]:
CO_NonlinearAutocorr(ts1, [1, 2, 3])

0.32868826608330426

In [153]:
from scipy.stats import trim_mean

In [190]:
def DN_TrimmedMean(y, n=0):
    """
    Mean of the trimmed time series using trimmean.

    Parameters:
    ----------
    y (array-like): the input time series
    n (float): the fraction of highest and lowest values in y to exclude from the mean calculation

    Returns:
    --------
    out (float): the mean of the trimmed time series.
    """
    n *= 0.01
    N = len(y)
    trim = int(np.round(N * n / 2))
    y = np.sort(y)

    out = np.mean(y[trim:N-trim])

    return out

In [193]:
DN_TrimmedMean(ts1, 10)

0.0023594444444444205

In [202]:
def DN_Burstiness(y):
    """
    Calculate the burstiness statistic of a time series.

    This function returns the 'burstiness' statistic as defined in
    Goh and Barabasi's paper, "Burstiness and memory in complex systems,"
    Europhys. Lett. 81, 48002 (2008).

    Parameters
    ----------
    y : array-like
        The input time series.
    
    Returns
    -------
    dict
        The original burstiness statistic, B, and the improved
        burstiness statistic, B_Kim.
    """
    
    mean = np.mean(y)
    std = np.std(y)

    r = np.divide(std,mean) # coefficient of variation
    B = np.divide((r - 1), (r + 1)) # Original Goh and Barabasi burstiness statistic, B

    # improved burstiness statistic, accounting for scaling for finite time series
    # Kim and Jo, 2016, http://arxiv.org/pdf/1604.01125v1.pdf
    N = len(y)
    p1 = np.sqrt(N+1)*r - np.sqrt(N-1)
    p2 = (np.sqrt(N+1)-2)*r + np.sqrt(N-1)

    B_Kim = np.divide(p1, p2)

    out = {'B': B, 'B_Kim': B_Kim}

    return out


In [203]:
DN_Burstiness(ts3)

{'B': 0.92662389689055, 'B_Kim': 0.9867869006600554}

In [94]:
import numpy as np
from Operations.CO_HistogramAMI import CO_HistogramAMI
from Operations.CO_FirstCrossing import CO_FirstCrossing
from Operations.IN_AutoMutualInfo import IN_AutoMutualInfo
from Operations.CO_AutoCorr import CO_AutoCorr
from PeripheryFunctions.BF_SignChange import BF_SignChange
from PeripheryFunctions.BF_iszscored import BF_iszscored
from scipy.optimize import curve_fit
import warnings

def CO_AddNoise(y, tau = 1, amiMethod = 'even', extraParam = None, randomSeed = None):
    """
    CO_AddNoise: Changes in the automutual information with the addition of noise

    Parameters:
    y (array-like): The input time series (should be z-scored)
    tau (int or str): The time delay for computing AMI (default: 1)
    amiMethod (str): The method for computing AMI:
                      'std1','std2','quantiles','even' for histogram-based estimation,
                      'gaussian','kernel','kraskov1','kraskov2' for estimation using JIDT
    extraParam: e.g., the number of bins input to CO_HistogramAMI, or parameter for IN_AutoMutualInfo
    randomSeed (int): Settings for resetting the random seed for reproducible results

    Returns:
    dict: Statistics on the resulting set of automutual information estimates
    """

    if not BF_iszscored(y):
        warnings.warn("Input time series should be z-scored")
    
    # Set tau to minimum of autocorrelation function if 'ac' or 'tau'
    if tau in ['ac', 'tau']:
        tau = CO_FirstCrossing(y, 'ac', 0, 'discrete')
    
    # Generate noise
    if randomSeed is not None:
        np.random.seed(randomSeed)
    noise = np.random.randn(len(y)) # generate uncorrelated additive noise

    # Set up noise range
    noiseRange = np.linspace(0, 3, 50) # compare properties across this noise range
    numRepeats = len(noiseRange)

    # Compute the automutual information across a range of noise levels
    amis = np.zeros(numRepeats)
    if amiMethod in ['std1', 'std2', 'quantiles', 'even']:
        # histogram-based methods using my naive implementation in CO_Histogram
        for i in range(numRepeats):
            # use default num of bins for CO_HistogramAMI if not specified
            amis[i] = CO_HistogramAMI(y + noiseRange[i]*noise, tau, amiMethod, extraParam or 10)
            if np.isnan(amis[i]):
                raise ValueError('Error computing AMI: Time series too short (?)')
    if amiMethod in ['gaussian','kernel','kraskov1','kraskov2']:
        for i in range(numRepeats):
            amis[i] = IN_AutoMutualInfo(y + noiseRange[i]*noise, tau, amiMethod, extraParam)
            if np.isnan(amis[i]):
                raise ValueError('Error computing AMI: Time series too short (?)')
    
    # Output statistics
    out = {}
    # Proportion decreases
    out['pdec'] = np.sum(np.diff(amis) < 0) / (numRepeats - 1)

    # Mean change in AMI
    out['meanch'] = np.mean(np.diff(amis))

    # Autocorrelation of AMIs
    out['ac1'] = CO_AutoCorr(amis, 1, 'Fourier')[0]
    out['ac2'] = CO_AutoCorr(amis, 2, 'Fourier')[0]

    # Noise level required to reduce ami to proportion x of its initial value
    firstUnderVals = [0.75, 0.50, 0.25]
    for val in firstUnderVals:
        out[f'firstUnder{val*100}'] = firstUnder_fn(val * amis[0], noiseRange, amis)

    # AMI at actual noise levels: 0.5, 1, 1.5 and 2
    noiseLevels = [0.5, 1, 1.5, 2]
    for nlvl in noiseLevels:
        out[f'ami_at_{int(nlvl*10)}'] = amis[np.argmax(noiseRange >= nlvl)]

    # Count number of times the AMI function crosses its mean
    out['pcrossmean'] = np.sum(np.diff(np.sign(amis - np.mean(amis))) != 0) / (numRepeats - 1)

    # Fit exponential decay
    expFunc = lambda x, a, b : a * np.exp(b * x)
    popt, pcov = curve_fit(expFunc, noiseRange, amis, p0=[amis[0], -1])
    out['fitexpa'], out['fitexpb'] = popt
    residuals = amis - expFunc(noiseRange, *popt)
    ss_res = np.sum(residuals**2)
    ss_tot = np.sum((amis - np.mean(amis))**2)
    out['fitexpr2'] = 1 - (ss_res / ss_tot)
    out['fitexpadjr2'] = 1 - (1-out['fitexpr2'])*(len(amis)-1)/(len(amis)-2-1)
    out['fitexprmse'] = np.sqrt(np.mean(residuals**2))

    # Fit linear function
    p = np.polyfit(noiseRange, amis, 1)
    out['fitlina'], out['fitlinb'] = p
    lin_fit = np.polyval(p, noiseRange)
    out['linfit_mse'] = np.mean((lin_fit - amis)**2)

    return out

# helper functions
def firstUnder_fn(x, m, p):
    """
    Find the value of m for the first time p goes under the threshold, x. 
    p and m vectors of the same length
    """
    first_i = next((m_val for m_val, p_val in zip(m, p) if p_val < x), m[-1])
    return first_i
