In [1]:
import numpy as np
import warnings
import jpype as jp
import os
from scipy import stats

Goal: Try and initialise an MI calculator and use it

In [2]:
jarloc = "/Users/joshua/py-hctsa-project/Toolboxes/infodynamics-dist/infodynamics.jar"
jp.startJVM(jp.getDefaultJVMPath(), "-ea", "-Djava.class.path=" + jarloc)

Gaussian estimator

In [56]:
miCalcClass = jp.JPackage("infodynamics.measures.continuous.gaussian").MutualInfoCalculatorMultiVariateGaussian

In [57]:
miCalc = miCalcClass()

In [58]:
miCalc.initialise(1,1)

In [59]:
miCalc.setProperty('NOISE_LEVEL_TO_ADD', '0')

In [6]:
y1 = np.random.randn(20)
y2 = np.random.randn(20)

In [7]:
jp.JArray(jp.JDouble)(y1)

<java array 'double[]'>

In [8]:
miCalc.setObservations(jp.JArray(jp.JDouble)(y1), jp.JArray(jp.JDouble)(y2))

In [9]:
miCalc.computeAverageLocalOfObservations()

0.0024375638947790566

In [19]:
from Operations.IN_AutoMutualInfo import IN_AutoMutualInfo
from Operations.CO_FirstCrossing import CO_FirstCrossing
from scipy import stats

In [3]:
ts1 = np.loadtxt("ts1.txt")
ts2 = np.loadtxt("ts2.txt")
ts3 = np.loadtxt("ts3.txt")

In [7]:
def IN_Initialize_MI(estMethod, extraParam=None, addNoise=False):
    """
    Initialize Information Dynamics Toolkit object for MI computation.
    """
    if estMethod == 'gaussian':
        implementingClass = 'infodynamics.measures.continuous.gaussian'
        miCalc = jp.JPackage(implementingClass).MutualInfoCalculatorMultiVariateGaussian()
    elif estMethod == 'kernel':
        implementingClass = 'infodynamics.measures.continuous.kernel'
        miCalc = jp.JPackage(implementingClass).MutualInfoCalculatorMultiVariateKernel()
    elif estMethod == 'kraskov1':
        implementingClass = 'infodynamics.measures.continuous.kraskov'
        miCalc = jp.JPackage(implementingClass).MutualInfoCalculatorMultiVariateKraskov1()
    elif estMethod == 'kraskov2':
        implementingClass = 'infodynamics.measures.continuous.kraskov'
        miCalc = jp.JPackage(implementingClass).MutualInfoCalculatorMultiVariateKraskov2()
    else:
        raise ValueError(f"Unknown mutual information estimation method '{estMethod}'")

    # Add neighest neighbor option for KSG estimator
    if estMethod in ['kraskov1', 'kraskov2']:
        if extraParam != None:
            miCalc.setProperty('k', extraParam) # 4th input specifies number of nearest neighbors for KSG estimator
        else:
            miCalc.setProperty('k', '3') # use 3 nearest neighbors for KSG estimator as default
        
    # Make deterministic if kraskov1 or 2 (which adds a small amount of noise to the signal by default)
    if (estMethod in ['kraskov1', 'kraskov2']) and (addNoise == False):
        miCalc.setProperty('NOISE_LEVEL_TO_ADD','0')
    
    # Specify a univariate calculation
    miCalc.initialise(1,1)

    return miCalc

In [38]:
def IN_AutoMutualInfo(y, timeDelay=1, estMethod='gaussian', extraParam=None):
    """
    Time-series automutual information

    Parameters:
    -----------
    y : array_like
        Input time series (column vector)
    time_delay : int or list, optional
        Time lag for automutual information calculation (default is 1)
    est_method : str, optional
        The estimation method used to compute the mutual information:
        - 'gaussian'
        - 'kernel'
        - 'kraskov1'
        - 'kraskov2'
        (default is 'kernel')
    extra_param : any, optional
        Extra parameters for the estimation method (default is None)

    Returns:
    --------
    out : float or dict
        Automutual information value(s)
    """

    if isinstance(timeDelay, str) and timeDelay in ['ac', 'tau']:
        timeDelay = CO_FirstCrossing(y, corr_fun='ac', threshold=0, what_out='discrete')
        
    y = np.asarray(y).flatten()
    N = len(y)
    minSamples = 5 # minimum 5 samples to compute mutual information (could make higher?)

    # Loop over time delays if a vector
    if not isinstance(timeDelay, list):
        timeDelay = [timeDelay]
    
    numTimeDelays = len(timeDelay)
    amis = np.full(numTimeDelays, np.nan)

    if numTimeDelays > 1:
        timeDelay = np.sort(timeDelay)
    
    # initialise the MI calculator object if using non-Gaussian estimator
    if estMethod != 'gaussian':
        # assumes the JVM has already been started up
        miCalc = IN_Initialize_MI(estMethod, extraParam=extraParam, addNoise=False) # NO ADDED NOISE!
    
    for k, delay in enumerate(timeDelay):
        # check enough samples to compute automutual info
        if delay > N - minSamples:
            # time sereis too short - keep the remaining values as NaNs
            break
        # form the time-delay vectors y1 and y2
        y1 = y[:-delay]
        y2 = y[delay:]

        if estMethod == 'gaussian':
            r, _ = stats.pearsonr(y1, y2)
            amis[k] = -0.5*np.log(1 - r**2)
        else:
            # Reinitialize for Kraskov:
            miCalc.initialise(1, 1)
            # Set observations to time-delayed versions of the time series:
            y1_jp = jp.JArray(jp.JDouble)(y1) # convert observations to java double
            y2_jp = jp.JArray(jp.JDouble)(y2)
            miCalc.setObservations(y1_jp, y2_jp)
            # compute
            amis[k] = miCalc.computeAverageLocalOfObservations()
        
    if np.isnan(amis).any():
        print(f"Warning: Time series (N={N}) is too short for automutual information calculations up to lags of {max(timeDelay)}")
    if numTimeDelays == 1:
        # return a scalar if only one time delay
        return amis[0]
    else:
        # return a dict for multiple time delays
        return {f"ami{delay}": ami for delay, ami in zip(timeDelay, amis)}

In [39]:
IN_AutoMutualInfo(ts1)

1.6192050659713346

In [31]:
from PeripheryFunctions.BF_SignChange import BF_SignChange
from Operations.CO_AutoCorr import CO_AutoCorr
from Operations.IN_AutoMutualInfo import IN_AutoMutualInfo
import numpy as np
from scipy import stats

def IN_AutoMutualInfoStats(y, maxTau=None, estMethod='kernel', extraParam=None):
    """
    Statistics on automutual information function of a time series.

    Parameters:
    ----------
    y (array-like) : column vector of time series.
    estMethod (str) : input to IN_AutoMutualInfo
    extraParam (str, int, optional) : input to IN_AutoMutualInfo
    maxTau (int) : maximal time delay

    Returns:
    --------
    out (dict) : a dictionary containing statistics on the AMIs and their pattern across the range of specified time delays.
    """

    N = len(y) # length of the time series
    
    # maxTau: the maximum time delay to investigate
    if maxTau is None:
        maxTau = np.ceil(N/4)
    maxTau0 = int(maxTau)

    # Don't go above N/2
    maxTau = int(min(maxTau, np.ceil(N/2)))

    # Get the AMI data
    tDelay = list(range(1, maxTau+1))
    ami = IN_AutoMutualInfo(y, timeDelay=tDelay, estMethod=estMethod, extraParam=extraParam)
    ami = np.array(list(ami.values()))

    out = {} # create dict for storing results
    # Output the raw values
    for i in range(1, maxTau0+1):
        if i <= maxTau:
            out[f'ami{i}'] = ami[i-1]
        else:
            out[f'ami{i}'] = np.nan

    # Basic statistics
    lami = len(ami)
    out['mami'] = np.mean(ami)
    out['stdami'] = np.std(ami)

    # First minimum of mutual information across range
    dami = np.diff(ami)
    extremai = np.where((dami[:-1] * dami[1:]) < 0)[0]
    out['pextrema'] = len(extremai) / (lami - 1)
    out['fmmi'] = min(extremai) + 1 if len(extremai) > 0 else lami

    # Look for periodicities in local maxima
    maximai = np.where((dami[:-1] > 0) & (dami[1:] < 0))[0] + 1
    dmaximai = np.diff(maximai)
    out['pmaxima'] = len(dmaximai) / (lami // 2)
    if len(dmaximai) > 0:
        out['modeperiodmax'] = stats.mode(dmaximai, keepdims=True).mode[0]
        out['pmodeperiodmax'] = np.sum(dmaximai == out['modeperiodmax']) / len(dmaximai)
    else:
        out['modeperiodmax'] = np.nan
        out['pmodeperiodmax'] = np.nan

    # Look for periodicities in local minima
    minimai = np.where((dami[:-1] < 0) & (dami[1:] > 0))[0] + 1
    dminimai = np.diff(minimai)
    out['pminima'] = len(dminimai) / (lami // 2)
    if len(dminimai) > 0:
        out['modeperiodmin'] = stats.mode(dminimai, keepdims=True).mode[0]
        out['pmodeperiodmin'] = np.sum(dminimai == out['modeperiodmin']) / len(dminimai)
    else:
        out['modeperiodmin'] = np.nan
        out['pmodeperiodmin'] = np.nan
    
    # Number of crossings at mean/median level, percentiles
    out['pcrossmean'] = np.mean(BF_SignChange(ami - np.mean(ami)))
    out['pcrossmedian'] = np.mean(BF_SignChange(ami - np.median(ami)))
    out['pcrossq10'] = np.mean(BF_SignChange(ami - np.quantile(ami, 0.1)))
    out['pcrossq90'] = np.mean(BF_SignChange(ami - np.quantile(ami, 0.9)))
    
    # ac1
    out['amiac1'] = CO_AutoCorr(ami, 1, 'Fourier')[0]

    return out 


In [37]:
out = IN_AutoMutualInfoStats(ts3)

In [43]:
from Operations.IN_Initialize_MI import IN_Initialize_MI

In [44]:
def IN_MutualInfo(y1, y2, estMethod = 'kernel', extraParam = None):
    """
    Mutual information of two data vectors.

    """
    # Initialize miCalc object (don't add noise!):
    miCalc = IN_Initialize_MI(estMethod=estMethod, extraParam=extraParam, addNoise=False)
    # Set observations to two time series:
    y1_jp = jp.JArray(jp.JDouble)(y1) # convert observations to java double
    y2_jp = jp.JArray(jp.JDouble)(y2) # convert observations to java double
    miCalc.setObservations(y1_jp, y2_jp)

    # Compute mutual information
    out = miCalc.computeAverageLocalOfObservations()

    return out
