In [1]:
import numpy as np

In [5]:
ts1 = np.loadtxt("ts1.txt")
ts2 = np.loadtxt("ts2.txt")
ts3 = np.loadtxt("ts3.txt")

In [4]:
from Operations.ST_LocalExtrema import ST_LocalExrema as STLE

In [1]:
from ctypes import *
import ctypes

In [7]:
so_file = "/Users/joshua/Desktop/MS_shannon.so"
lib = CDLL(so_file)

In [8]:
lib.entropy.argtypes = [
    np.ctypeslib.ndpointer(dtype=np.float64, flags='C_CONTIGUOUS'),
    ctypes.c_int,
    ctypes.c_int,
    ctypes.c_int,
    ctypes.POINTER(ctypes.c_double)
]
lib.entropy.restype = None  # void function

In [9]:
def shannon_entropy(data, bin_count, depth):
    """
    Calculate Shannon Entropy using the C function.
    
    Args:
    data (numpy.array): Input time series data
    bin_count (int): Number of bins for encoding
    depth (int): Depth of the encoding
    
    Returns:
    float: Calculated Shannon Entropy
    """
    data = np.ascontiguousarray(data, dtype=np.float64)
    length = len(data)
    result = ctypes.c_double()
    
    lib.entropy(data, length, bin_count, depth, ctypes.byref(result))
    
    return result.value

In [10]:
shannon_entropy(ts3, 2, 5)

3.258990526199341

In [12]:
numBins = 10
numBins = [numBins]

In [63]:
import numpy as np
from scipy import stats
from Operations.CO_FirstCrossing import CO_FirstCrossing
from Operations.CO_AutoCorr import CO_AutoCorr
from numpy import histogram_bin_edges

def CO_Embed2_Shapes(y, tau = 'tau', shape = 'circle', r = 1):
    """
    Shape-based statistics in a 2-d embedding space.

    Takes a shape and places it on each point in the two-dimensional time-delay
    embedding space sequentially. This function counts the points inside this shape
    as a function of time, and returns statistics on this extracted time series.

    Parameters:
    -----------
    y : array_like
        The input time-series as a (z-scored) column vector.
    tau : int or str, optional
        The time-delay. If 'tau', it's set to the first zero crossing of the autocorrelation function.
    shape : str, optional
        The shape to use. Currently only 'circle' is supported.
    r : float, optional
        The radius of the circle.

    Returns:
    --------
    dict
        A dictionary containing various statistics of the constructed time series.
    """
    if tau == 'tau':
        tau = CO_FirstCrossing(y, 'ac', 0, 'discrete')
        # cannot set time delay > 10% of the length of the time series...
        if tau > len(y)/10:
            tau = int(np.floor(len(y)/10))
        
    # Create the recurrence space, populated by points m
    m = np.column_stack((y[:-tau], y[tau:]))
    N = len(m)

    # Start the analysis
    counts = np.zeros(N)
    if shape == 'circle':
        # Puts a circle around each point in the embedding space in turn
        # counts how many pts are inside this shape, looks at the time series thus formed
        for i in range(N): # across all pts in the time series
            m_c = m - m[i] # pts wrt current pt i
            m_c_d = np.sum(m_c**2, axis=1) # Euclidean distances from pt i
            counts[i] = np.sum(m_c_d <= r**2) # number of pts enclosed in a circle of radius r
    else:
        raise ValueError(f"Unknown shape '{shape}'")
    
    counts -= 1 # ignore self counts

    if np.all(counts == 0):
        print("No counts detected!")
        return np.nan

    # Return basic statistics on the counts
    out = {}
    out['ac1'] = CO_AutoCorr(counts, 1, 'Fourier')[0]
    out['ac2'] = CO_AutoCorr(counts, 2, 'Fourier')[0]
    out['ac3'] = CO_AutoCorr(counts, 3, 'Fourier')[0]
    out['tau'] = CO_FirstCrossing(counts, 'ac', 0, 'continuous')
    out['max'] = np.max(counts)
    out['std'] = np.std(counts, ddof=1)
    out['median'] = np.median(counts)
    out['mean'] = np.mean(counts)
    out['iqr'] = np.percentile(counts, 75, method='hazen') - np.percentile(counts, 25, method='hazen')
    out['iqronrange'] = out['iqr']/np.ptp(counts)

    # distribution - using sqrt binning method
    edges = histogram_bin_edges(counts, bins='sqrt')
    binCounts, binEdges = np.histogram(counts, bins=edges)
    # normalise bin counts
    binCountsNorm = np.divide(binCounts, np.sum(binCounts))
    print(len(binCountsNorm))
    # get bin centres
    binCentres = (binEdges[:-1] + binEdges[1:]) / 2
    out['mode_val'] = np.max(binCountsNorm)
    out['mode'] = binCentres[np.argmax(binCountsNorm)]
    # histogram entropy
    out['hist_ent'] = np.sum(binCountsNorm[binCountsNorm > 0] * np.log(binCountsNorm[binCountsNorm > 0]))

    # Stationarity measure for fifths of the time series
    afifth = int(np.floor(N/5))
    buffer_m = np.array([counts[i*afifth:(i+1)*afifth] for i in range(5)])
    out['statav5_m'] = np.std(np.mean(buffer_m, axis=1), ddof=1) / np.std(counts, ddof=1)
    out['statav5_s'] = np.std(np.std(buffer_m, axis=1), ddof=1) / np.std(counts, ddof=1)


    return out


In [66]:
CO_Embed2_Shapes(ts3, r=0.1)

32


{'ac1': 0.23392250602472692,
 'ac2': 0.005446701506938792,
 'ac3': -0.013460515225042154,
 'tau': 2.288075266928414,
 'max': 10.0,
 'std': 2.2532646204888898,
 'median': 2.0,
 'mean': 2.5265265265265264,
 'iqr': 3.0,
 'iqronrange': 0.3,
 'mode_val': 0.2122122122122122,
 'mode': 0.15625,
 'hist_ent': -2.0469340846150486,
 'statav5_m': 0.05265712720156759,
 'statav5_s': 0.040588662251160465}

In [12]:
import numpy as np
from Operations.CO_HistogramAMI import CO_HistogramAMI
from Operations.CO_FirstCrossing import CO_FirstCrossing
from Operations.IN_AutoMutualInfo import IN_AutoMutualInfo
from Operations.CO_AutoCorr import CO_AutoCorr
from PeripheryFunctions.BF_SignChange import BF_SignChange
from PeripheryFunctions.BF_iszscored import BF_iszscored
from scipy.optimize import curve_fit
import warnings

def CO_AddNoise(y, tau = 1, amiMethod = 'even', extraParam = None, randomSeed = None):
    """
    CO_AddNoise: Changes in the automutual information with the addition of noise

    Parameters:
    y (array-like): The input time series (should be z-scored)
    tau (int or str): The time delay for computing AMI (default: 1)
    amiMethod (str): The method for computing AMI:
                      'std1','std2','quantiles','even' for histogram-based estimation,
                      'gaussian','kernel','kraskov1','kraskov2' for estimation using JIDT
    extraParam: e.g., the number of bins input to CO_HistogramAMI, or parameter for IN_AutoMutualInfo
    randomSeed (int): Settings for resetting the random seed for reproducible results

    Returns:
    dict: Statistics on the resulting set of automutual information estimates
    """

    if not BF_iszscored(y):
        warnings.warn("Input time series should be z-scored")
    
    # Set tau to minimum of autocorrelation function if 'ac' or 'tau'
    if tau in ['ac', 'tau']:
        tau = CO_FirstCrossing(y, 'ac', 0, 'discrete')
    
    # Generate noise
    if randomSeed is not None:
        np.random.seed(randomSeed)
    noise = np.random.randn(len(y)) # generate uncorrelated additive noise

    # Set up noise range
    noiseRange = np.linspace(0, 3, 50) # compare properties across this noise range
    numRepeats = len(noiseRange)

    # Compute the automutual information across a range of noise levels
    amis = np.zeros(numRepeats)
    if amiMethod in ['std1', 'std2', 'quantiles', 'even']:
        # histogram-based methods using my naive implementation in CO_Histogram
        for i in range(numRepeats):
            amis[i] = CO_HistogramAMI(y + noiseRange[i]*noise, tau, amiMethod, extraParam)
            if np.isnan(amis[i]):
                raise ValueError('Error computing AMI: Time series too short (?)')
    if amiMethod in ['gaussian','kernel','kraskov1','kraskov2']:
        for i in range(numRepeats):
            amis[i] = IN_AutoMutualInfo(y + noiseRange[i]*noise, tau, amiMethod, extraParam)
            if np.isnan(amis[i]):
                raise ValueError('Error computing AMI: Time series too short (?)')
    
    # Output statistics
    out = {}
    # Proportion decreases
    out['pdec'] = np.sum(np.diff(amis) < 0) / (numRepeats - 1)

    # Mean change in AMI
    out['meanch'] = np.mean(np.diff(amis))

    # Autocorrelation of AMIs
    out['ac1'] = CO_AutoCorr(amis, 1, 'Fourier')
    out['ac2'] = CO_AutoCorr(amis, 2, 'Fourier')

    # Noise level required to reduce ami to proportion x of its initial value
    firstUnderVals = [0.75, 0.50, 0.25]
    for val in firstUnderVals:
        out[f'firstUnder{val*100}'] = firstUnder_fn(val * amis[0], noiseRange, amis)

    # AMI at actual noise levels: 0.5, 1, 1.5 and 2
    noiseLevels = [0.5, 1, 1.5, 2]
    for nlvl in noiseLevels:
        out[f'ami_at_{nlvl*10}'] = amis[np.argmax(noiseRange >= nlvl)]

    # Count number of times the AMI function crosses its mean
    out['pcrossmean'] = np.sum(np.diff(np.sign(amis - np.mean(amis))) != 0) / (numRepeats - 1)

    # Fit exponential decay
    expFunc = lambda x, a, b : a * np.exp(b * x)
    popt, pcov = curve_fit(expFunc, noiseRange, amis, p0=[amis[0], -1])
    out['fitexpa'], out['fitexpb'] = popt
    residuals = amis - expFunc(noiseRange, *popt)
    ss_res = np.sum(residuals**2)
    ss_tot = np.sum((amis - np.mean(amis))**2)
    out['fitexpr2'] = 1 - (ss_res / ss_tot)
    out['fitexpadjr2'] = 1 - (1-out['fitexpr2'])*(len(amis)-1)/(len(amis)-2-1)
    out['fitexprmse'] = np.sqrt(np.mean(residuals**2))

    # Fit linear function
    p = np.polyfit(noiseRange, amis, 1)
    out['fitlina'], out['fitlinb'] = p
    lin_fit = np.polyval(p, noiseRange)
    out['linfit_mse'] = np.mean((lin_fit - amis)**2)

    return out

# helper functions
def firstUnder_fn(x, m, p):
    """
    Find the value of m for the first time p goes under the threshold, x. 
    p and m vectors of the same length
    """
    first_i = next((m_val for m_val, p_val in zip(m, p) if p_val < x), m[-1])
    return first_i


In [13]:
CO_AddNoise(ts1)



TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'