# Extended Convergent Cross Mapping (ECCM) Algorithm on Discrete First Order Systems
This notebook tests the feasibility of using ECCM on discrete first-order transfer functions with various input signals.

***

## Section 0: Environment Setup

### Import relevant packages

In [10]:
# Import numerical packages
import numpy as np
import pandas as pd
import math

# Discrete transfer function signal simulation
from scipy import signal

# For creating permutations of variable pairs
import itertools

# Plot.ly visualisations
import plotly
import plotly.plotly as py
import plotly.offline as pyo # Plot.ly visualisations
import plotly.graph_objs as go # Plot.ly visualisations

# Display DataFrames mid-script
from IPython.display import display

# Ignore warnings when copying a filtered DataFrame
import warnings
warnings.filterwarnings('ignore')

### Configure environment

In [2]:
%config InlineBackend.figure_format = 'retina'
np.set_printoptions(precision=3)

# Activate Plotly Offline for Jupyter
pyo.init_notebook_mode(connected=True)

### Define helper functions

In [316]:
def generate_delayed_df(df, embedDim, delay=1):
    '''
    Generate a delayed dataframe of `embedDim` dimensions, each phase-shifted by `delay` delay.

    Input:
        df:         Panda dataframe
        embedDim:  Embedding dimensions (int)
        delay:      Number of samples between each time series point (int)
        
    Returns:
        A multiLevel dataframe.
    '''
    # Obtain rows of dataframe
    N = len(df)
    
    assert embedDim > 1
    assert delay >= 1
    
    # Define empty dataframe
    output = pd.DataFrame()
    
    # Define empty list for column headers
    column_headers = []
    
    # Create d duplicates of each time-series, and shift duplicated time-series by 'delay' sampling intervals
    for field, series in df.iteritems():
        for i in range(embedDim):
            output = pd.concat([output, series.shift(- i * delay)], axis=1)

            # Create multiLevel header
            column_headers.append((field, str(i)))
    
    # Add column headers to dataframe
    output.columns = pd.MultiIndex.from_tuples(column_headers)
    
    # Removes rows containing NaN 
    # Reset index to start from 0
    processed_output = (output
                        .dropna()
                        .reset_index(drop=True))
        
    return processed_output

def scatter_plot(legends, *args):
    '''
    Create a 2D or 3D scatter plot corresponding 
    to the length of `legends`.
    
    The length of `legends` has to be the same as 
    the lenght of `args`
    
    '''
    assert len(legends) == len(args)
    
    if len(args) == 2:
        data = go.Data([go.Scattergl(
            x = args[0],
            y = args[1],
            mode = 'markers',
            marker = {'size': 2}
        )])
        
        layout = go.Layout(
            height = 400,
            width = 600,
            margin = {'t': 0},
            xaxis = {'title': legends[0]},
            yaxis = {'title': legends[1]}
        )
    elif len(args) == 3:
        data = go.Data([go.Scatter3d(
            x = args[0],
            y = args[1],
            z = args[2],
            mode = 'markers',
            marker = {'size': 2}
        )])
        
        layout = go.Layout(
            height = 800,
            width = 800,
            margin = {'t': 0},
            scene = {
                'xaxis': {'title': legends[0]},
                'yaxis': {'title': legends[1]},
                'zaxis': {'title': legends[2]},
            }
        )
    else:
        raise
    
    pyo.iplot(go.Figure(data=data, layout=layout))

def multiple_scatter(legends, *args):
    remainingArgs = args[1:]
    
    # Define number of remaining arguments as N
    N = len(remainingArgs)
    
    figure = plotly.tools.make_subplots(N, 1, 
                                        print_grid = False,
                                        shared_xaxes = True,
                                        vertical_spacing = 0.05)
    
    # Add traces
    for (i, (legend, var)) in enumerate(zip(legends, remainingArgs)):
        trace = go.Scatter(
            x = args[0],
            y = var,
            name = legend,
            marker = {'size': 2}
        )
        
        figure.append_trace(trace, i + 1, 1)
    
    
    # Update layout
    figure['layout'].update(
        height = 200 * N,
        width = 800,
        margin = {'t': 0},
        showlegend = False,
    )
    
    # Add layout axes titles
    for (i, legend) in enumerate(legends):
        figure['layout']['yaxis{}'.format(i + 1)].update(
            title = legend,
        )
    
    # Create figure
    pyo.iplot(figure)

def plot_causalities(causalities):
    def _check_assertions(causalities):
        '''
        Perform assertion checks on `causality` input
        '''
        # Check if causalities is a Pandas DataFrame
        assert isinstance(causalities, pd.DataFrame)
        
        # Check if there are odd numbers of cross map lags
        assert len(causalities) % 2 == 1
    
    # Check validity of input data
    _check_assertions(causalities)
    
    # Obtain x-domain to plot
    xmapLags = causalities.index.values
    
    # Define number of traces to plot
    N = len(list(causalities))
    
    figure = plotly.tools.make_subplots(N, 1, 
                                        print_grid = False,
                                        shared_xaxes = False)
    
    # Add traces
    for (i, (title, series)) in enumerate(causalities.iteritems()):
        trace = go.Scatter(
            x = xmapLags,
            y = series,
            name = title,
            marker = {'size': 2}
        )
        
        figure.append_trace(trace, i + 1, 1)
    
    
    # Update layout
    figure['layout'].update(
        height = 200 * N,
        width = 800,
        margin = {'t': 0},
        showlegend = False,
    )
    
    # Adjust y-axis titles and ranges
    for (i, (title, series)) in enumerate(causalities.iteritems()):
        figure['layout']['yaxis{}'.format(i + 1)].update(
            title = title,
            range = [min(np.append(series, 0)), 1],
        )
    
    # Create figure
    pyo.iplot(figure)

def PCA_and_scatter_plot(DF):
    '''
    Performs PCA on a DF matrix of shape N x D,
    and plots it along its 3-largest principal 
    components.
    
    N is the number of datapoints, while D is 
    the number of variables in the DF, such 
    that N >> D.
    
    Inputs:
        DF: A DF containing the states of the 
            object. Do not supply a time-delay
            embedded DF here.
    
    Returns:
        An interactive 3D Plotly scatterplot.
    '''
    
    assert len(list(DF)) >= 3
    
    # Obtain the square matrix (D x D)
    covMat = np.matmul(DF.T, DF)
    
    # Calculate the eigenvalues and eigenvectors
    eigvals, eigvect = np.linalg.eig(covMat)
    
    # Find the sorted indices in decreasing order
    sortedIndex = np.argsort(-eigvals)
    
    # Obtain the top 3 sorted eigenvectors
    sortedEigVect = np.zeros((DF.shape[1], 3))
    
    for i in range(3):
        sortedEigVect[:, i] = eigvect[:, sortedIndex[i]]
        
    # Calculate the scores corresponding to the top
    # 3 principal components
    zscores = np.matmul(DF, sortedEigVect)
    
    # Plot on plotly
    data = go.Data([go.Scatter3d(
        x = zscores[:, 0],
        y = zscores[:, 1],
        z = zscores[:, 2],
        mode = 'markers',
        marker = {'size': 2},
        hoverinfo = 'text',
        text = ['PC1: {}<br>PC2: {}<br>PC3: {}'.format(zscore[0], zscore[1], zscore[2]) for zscore in zscores]
    )])
        
    layout = go.Layout(
        height = 800,
        width = 800,
        margin = {'t': 0},
        scene = {
            'xaxis': {'title': 'PC1'},
            'yaxis': {'title': 'PC2'},
            'zaxis': {'title': 'PC3'},
        }
    )
    
    pyo.iplot(go.Figure(data=data, layout=layout))
    
def extCCM_wrapper(DF):
    '''
    A wrapper function for the Extended CCM algorithm, to handle
    repetitive variable-pair manipulations. Uses the last 25% of
    datapoints or 1,000 points, whichever is lower, to run ECCM. 
    
    Returns:
        causalitiesDF:    A DataFrame of calculated causalities for 
                          each variable-pair.
        causalityPeaksDF: A DataFrame containing the coordinates of
                          the causality peaks for each variable-pair.
    '''
    def _URC_cross_map_lags(DF):
        '''
        Calculate the critical cross map lag of each causality series 
        baed on the Upper Right Corner (URC) criteria.
        
        Critical cross map lag is defined as the cross map lag before
        the lag corresponding to the maximum discrete concativity.
        Concativity can be calculated using discrete first- and second-
        order derivatives.
        
        Inputs:
            DF: An (N x P) DataFrame containing causalities for P 
                variable pairs at N cross map lags.
        
        Returns:
            A (P* x 1) DataFrame containing the critical URC cross map
            lags, where 0 <= P* <= P. 
        '''
        # Concatenate first- and second-order derivatives into a 
        # DataFrame with keys 'delta' and 'doubleDelta'
        _derivAllVarsDF = pd.concat([DF.diff(), DF.diff().diff()], 
                                    axis=1, 
                                    keys=['delta', 'doubleDelta'])
        
        # Create a list to store critical URC cross maps
        URCLags = []
        
        # Iterate through every variable pair
        for key in DF.columns:
            # Create a (N x 2) DataFrame containing the 
            # first- and second- order derivative of 
            # a single variable pair
            _derivSingleVarDF = _derivAllVarsDF.loc[:, _derivAllVarsDF.columns.get_level_values(1) == key]
            
            # Calculate critical cross map lag
            # 1. Filter values in _df which are strictly nonpositive
            # 2. Drop rows that contain do not match filter criteria
            # 3. Select 'doubleDelta' column
            # 4. Obtain index of minimum 'doubleDelta' less one,
            #    as a Pandas Series
            try:
                critXMapLag = ((_derivSingleVarDF[_derivSingleVarDF.apply(lambda x: x <= 0)])
                               .dropna())['doubleDelta'].idxmin() - 1
            except ValueError:
                critXMapLag = pd.Series(None, index=[key])
            
            # Append critical lags to `URCLags`
            URCLags.append(critXMapLag)

        # Create DataFrame of critical lags
        return pd.DataFrame(pd.concat(URCLags), columns=['URC xMap Lags'])
    
    def _peak_causality_coordinates(DF):
        '''
        Calculate the coordinate of causality peak
        for each variable pair in DF. 

        Returns a Pandas DataFrame. 
        '''
        # Create a new Pandas DataFrame
        peakCausalitiesDF = pd.DataFrame()

        # Calculate peak causalities
        peakCausalitiesDF['Peak Causality'] = DF.max().round(3)

        # Obtain peak cross map lag
        peakCausalitiesDF['Peak xMap Lag'] = DF.idxmax()
        
        # Calculate Upper Right Corner (URC) cross map lag
        peakCausalitiesDF['URC xMap Lag'] = _URC_cross_map_lags(DF)
        
        # Calculate URC Causality
        URCCausalities = []
        for (index, value) in peakCausalitiesDF['URC xMap Lag'].iteritems():
            # If cross map lag is None, causality is also None
            URCCausalities.append(None if pd.isnull(value) else DF.loc[value, index])
        peakCausalitiesDF['URC Causality'] = pd.Series(URCCausalities, index=peakCausalitiesDF.index)

        # Rearrange columns and return DataFrame
        return peakCausalitiesDF[['Peak Causality', 
                                  'Peak xMap Lag',
                                  'URC Causality',
                                  'URC xMap Lag']]

    # Calculate length of datapoints to use
    N = min(int(0.25 * len(DF)), 1000)
    
    # Create empty DataFrame to store results
    causalitiesDF = pd.DataFrame()

    # Run Extended CCM algorithm
    for (source, target) in itertools.permutations(DF.columns, 2):
        causalitiesDF['{} → {}'.format(target, source)] = \
            extCCM(DF[-N:], 
                   sourceCol=source, 
                   targetCol=target,
                   embedDim=len(DF.columns)
                  )

    # Add 'Cross Map Lag' as DataFrame index
    causalitiesDF['xMap Lag'] = range(-5, 6)
    causalitiesDF.set_index('xMap Lag', inplace=True)
    
    peaksDF = _peak_causality_coordinates(causalitiesDF)
    
    return causalitiesDF, peaksDF

def visualise_results(statesDF, causalitiesDF, peaksDF, threshold=0.5):
    '''
    Visualise result of ECCM algorithm, by plotting the
    time series, state-space, causalities plot and a
    DataFrame of the causality peaks.
    '''
    # Visualise time series
    print 'System Time Series'
    multiple_scatter(statesDF.columns, 
                     range(len(statesDF)), 
                     *statesDF.values.T)
    
    # Visualise attractor
    print '\n\n\nState Space Plot'
    if len(statesDF.columns) > 3:
        PCA_and_scatter_plot(statesDF)
    else:
        scatter_plot(statesDF.columns, *statesDF.values.T)
        
    
    # Visualise ECCM results
    print '\n\n\nExtended CCM results'
    plot_causalities(causalitiesDF)

    # Display causality peaks
    display(peaksDF[((peaksDF['Peak Causality'] >= threshold) \
                    &   (peaksDF['Peak xMap Lag'] < 0)) \
                    | ((peaksDF['URC Causality'] >= threshold) \
                    & (peaksDF['URC xMap Lag'] < 0))])

### Define CCM algorithm

In [4]:
def CCM(DF, dataCol, targetCol, k=3, attractor_viz=False, prediction_corr_viz=False):
    '''
    Perform convergent cross-mapping (CCM) algorithm described in paper.
    Inputs:
        DF:     A Pandas DataFrame
        data:   A string corresponding to data column in DF to perform k-NN
        target: A string corresponding to target column in DF to perform prediction
        k:      Number of nearest neighbours (scalar)
    Returns:
        predictions: Predicted values (1-D array)
        causality:   Calculated causality from correlation plot (float)
    '''
    def euclidean_dist(A, B=None):
        '''
        Calculate the euclidean distance for rows in matrix A and rows in matrix B.
        If B is None, calculate distances for rows between matrix A.
        Inputs:
            A: A matrix (a x P)
            B: A matrix (b x k x P)
        Returns:
            A distance matrix (a x b), indicating the distance of all non-i-th point to the i-th point. 
        ''' 
        # Define input matrices with expanded dimensions
        A_expanded = np.expand_dims(A, 2)
        
        # Epsilon term for numerical stability
        epsilon = 1e-9
        
        # Calculate distance of each point and every other point
        if B is None:
            return np.sqrt(np.sum(np.square(A_expanded - np.transpose(A_expanded, (2, 1, 0))), axis=1)) + epsilon
        else:
            return np.sqrt(np.sum(np.square(np.transpose(A_expanded, (0,2,1)) - B), axis=2)) + epsilon
   
    def kNN(k, data):
        '''
        Return the nearest neighbours to each row in data in the form of a responsibility matrix.
        Inputs:
            k:    Number of nearest neighbours (scalar)
            data: Data to perform k-NN, a numpy array (N x P)
        Returns:
            A responsibility matrix (N x k), listing the indices of the k-nearest neighbours for each row
        '''

        def responsibilities(k, distances):
            '''
            Finds the k-nearest neighbours to each point by index.
            Inputs:
                k:         Number of nearest neighbours (scalar)
                distances: A distance matrix (N x N)
            Returns:
                A responsibility matrix (N x k), listing the indices of the k-nearest neighbours for each row
            '''
            return np.argsort(distances)[:,1:(k + 1)]

        return responsibilities(k, euclidean_dist(data))

    def predict_target(data, target, responsibilities):
        '''
        Performa a prediction of the target based on a weighting of contemporaneous neighbours of data.
        Inputs:
            data:             Data values (N x P)
            target:           Target values to perform prediction (N x P)
            responsibilities: A responsibility matrix (N x k)
        Returns:
            An array of predicted target values (N)
        '''

        def calculate_weights(data, responsibilities):
            '''
            Calculate weights based on the k-nearest neighbours
            Inputs:
                data:             Data values (N x P)
                responsibilities: A responsibility matrix (N x k)
            Returns:
                A matrix of weights (N x k)
            '''
            # Obtain shape of responsibilities
            N, k = responsibilities.shape

            # Calculate values for numerator
            for i in range(k):
                numerator = np.exp( - np.divide(euclidean_dist(data, data[responsibilities]), \
                                                euclidean_dist(data, data[responsibilities])[:,0][:, np.newaxis]))

            # Calculate denominator
            denominator = np.sum(numerator, axis=1, keepdims=True)

            # Calculate and return weights
            return np.divide(numerator, denominator)
        
        weights = calculate_weights(data, responsibilities)
        return np.sum(target[responsibilities] * np.expand_dims(weights, axis=2), axis=1)
    
    def L2_score(target, prediction):
        '''
        Calculate the L2 distance between the min-max normalised values of target and prediction values.
        
        Inputs:
            target: An array of target values
            prediction: An array of prediction values
            
        Output:
            Min-max normalised average L2 distance of target-prediction pairs.
        '''
        assert len(target) == len(prediction)
        
        _combinedVars = np.append(target, prediction)
        target = (target - min(_combinedVars)) / (max(_combinedVars) - min(_combinedVars))
        prediction = (prediction - min(_combinedVars)) / (max(_combinedVars) - min(_combinedVars))
        
        return np.sqrt(1. / (len(target)) * np.sum((target - prediction)**2))
    
    def visualise_predictions(targetDict, predictionsDict):
        '''
        Create a scatterplot visualising predictions vs. target.
        Inputs:
            target:      Target values (N x P)
            predictions: Prediction values (N x P)
        '''
        figure = plotly.tools.make_subplots(1, 2, print_grid=False, 
                                            subplot_titles=['{0} → {1} (rho = {2})<br>{3:.3f}'\
                                                            .format(targetCol, 
                                                                    dataCol, 
                                                                    np.round(np.corrcoef(targetDict[targetCol][:,-1], 
                                                                                         predictionsDict[targetCol][:,-1])[0,-1], 3),
                                                                    L2_score(targetDict[targetCol][:, -1], predictionsDict[targetCol][:, -1])),
                                                            '{0} → {1} (rho = {2})<br>{3:.3f}'\
                                                            .format(dataCol, 
                                                                    targetCol, 
                                                                    np.round(np.corrcoef(targetDict[dataCol][:,-1], 
                                                                                         predictionsDict[dataCol][:,-1])[0,-1], 3),
                                                                    L2_score(targetDict[dataCol][:, -1], predictionsDict[dataCol][:, -1])),
                                                           ])
        
        for n in range(2):
            varCols = [dataCol, targetCol]
        
            unReconstrVar = varCols[n]
            reconstrVar = varCols[1 - n]
            
            target = targetDict[reconstrVar]
            predictions = predictionsDict[reconstrVar]
            
            trace = go.Scattergl(
                x = target[:,-1],
                y = predictions[:,-1],
                mode = 'markers',
                hoverinfo = 'text',
                text = [str(i) for i in range(len(target))]
            )

            combined_data = np.append(target, predictions)

            line_trace = go.Scattergl(
                x = [np.min(combined_data), np.max(combined_data)],
                y = [np.min(combined_data), np.max(combined_data)],
                mode = 'lines',
                hoverinfo = 'none',
                line = {
                    'color': '#000000',
                    'dash': 'dash',
                    'width': 2
                }
            )
            
            figure.append_trace(trace, 1, n + 1)
            figure.append_trace(line_trace, 1, n + 1)

        figure['layout'].update(go.Layout(
            showlegend = False,
            height = 400,
            width = 1000,
            xaxis = {'title': varCols[1], 'domain': [0., 0.4]},
            xaxis2 = {'title': varCols[0], 'domain': [0.6, 1.]},
            yaxis = {'title': '{0} | M({1})'.format(varCols[1], varCols[0])},
            yaxis2 = {'title': '{0} | M({1})'.format(varCols[0], varCols[1])},
        ))
        
        pyo.iplot(figure)
        
    
    ###################
    # Function begins #
    ###################
    
    # Define `data` and `target`
    data = DF[dataCol].values
    target = DF[targetCol].values
    
    targetDict = {}
    predictionsDict = {}
    
    for i in range(2):
        varCols = [dataCol, targetCol]
        
        unReconstrVar = varCols[i]
        reconstrVar = varCols[1 - i]
            
        data = DF[unReconstrVar].values
        target = DF[reconstrVar].values
        targetDict[reconstrVar] = target

        # Find indices of k-nearest neighbours
        responsibilities = kNN(k, data)

        # Calculate predicted target values
        predictionsDict[reconstrVar] = predict_target(data, target, responsibilities)

        # Create interactive attractor animation
        if attractor_viz == True:
            visualise_attractor(responsibilities, predictionsDict[reconstrVar])

    # Create correlation plot
    if prediction_corr_viz == True:
        visualise_predictions(targetDict, predictionsDict)
        
    return (np.corrcoef(targetDict[targetCol][:,-1], predictionsDict[targetCol][:,-1])[0,-1],
            np.corrcoef(targetDict[dataCol][:,-1], predictionsDict[dataCol][:,-1])[0,-1])
    
    # Calculate normalised L2 distance between target and predictions
    return (L2_score(targetDict[targetCol][:, -1], predictionsDict[targetCol][:, -1]),
            L2_score(targetDict[dataCol][:, -1], predictionsDict[dataCol][:, -1]))

In [5]:
def extCCM(DF, sourceCol, targetCol, crossMapLags=5, embedDim=2, delay=1):
    '''
    Take a multi-level dataframe and apply `crossMapLag`.
    
    Inputs:
        delayedDF: A Panda DataFrame that has undergone delayed embedding transformation.
        crossMapLag: A scalar value by which to shift sourceCol / targetCol forward or backwards.
        
    Return:
        A Plotly graph.
    '''
    
    def generate_delayed_df(df, sourceCol, targetCol, crossMapLag, embedDim, delay=1):
        '''
        Generate a delayed dataframe of `embedDim` dimensions, each phase-shifted by `delay` delay.

        Input:
            df:           Panda dataframe
            crossMapLag:  Lags to shift sourceCol by (positive value: `source` maps to past `target`)
            embedDim:     Embedding dimensions (int)
            delay:        Number of samples between each time series point (int)

        Returns:
            A multiLevel dataframe.
        '''
        # Obtain rows of dataframe
        N = len(df)

        assert embedDim > 1
        assert delay >= 1

        # Define empty dataframe
        output = pd.DataFrame()

        # Define empty list for column headers
        column_headers = []

        # Create d duplicates of each time-series, and shift duplicated time-series by 'delay' sampling intervals
        for field, series in df[[sourceCol, targetCol]].iteritems():
            for i in range(embedDim):
                output = pd.concat([output, series.shift(- i * delay - (crossMapLag if field == sourceCol else 0))], 
                                   axis=1)

                # Create multiLevel header
                column_headers.append((field, str(i)))

        # Add column headers to dataframe
        output.columns = pd.MultiIndex.from_tuples(column_headers)

        # Removes rows containing NaN 
        # Reset index to start from 0
        processed_output = (output
                            .dropna()
                            .reset_index(drop=True))

        return processed_output
    
    def CCM_one_way(DF, sourceCol, targetCol, k=3):
        '''
        Perform convergent cross-mapping (CCM) algorithm described in paper.
        Inputs:
            sourceCol:   A string corresponding to data column in DF to perform k-NN
            targetCol:   A string corresponding to target column in DF to perform prediction
            k:           Number of nearest neighbours (scalar)
        Returns:
            causality:   Calculated causality from correlation plot (float)
        '''
        def euclidean_dist(A, B=None):
            '''
            Calculate the euclidean distance for rows in matrix A and rows in matrix B.
            If B is None, calculate distances for rows between matrix A.
            Inputs:
                A: A matrix (a x P)
                B: A matrix (b x k x P)
            Returns:
                A distance matrix (a x b), indicating the distance of all non-i-th point to the i-th point. 
            ''' 
            # Define input matrices with expanded dimensions
            A_expanded = np.expand_dims(A, 2)

            # Epsilon term for numerical stability
            epsilon = 1e-9

            # Calculate distance of each point and every other point
            if B is None:
                return np.sqrt(np.sum(np.square(A_expanded - np.transpose(A_expanded, (2, 1, 0))), axis=1)) + epsilon
            else:
                return np.sqrt(np.sum(np.square(np.transpose(A_expanded, (0,2,1)) - B), axis=2)) + epsilon

        def kNN(k, data):
            '''
            Return the nearest neighbours to each row in data in the form of a responsibility matrix.
            Inputs:
                k:    Number of nearest neighbours (scalar)
                data: Data to perform k-NN, a numpy array (N x P)
            Returns:
                A responsibility matrix (N x k), listing the indices of the k-nearest neighbours for each row
            '''

            def responsibilities(k, distances):
                '''
                Finds the k-nearest neighbours to each point by index.
                Inputs:
                    k:         Number of nearest neighbours (scalar)
                    distances: A distance matrix (N x N)
                Returns:
                    A responsibility matrix (N x k), listing the indices of the k-nearest neighbours for each row
                '''
                return np.argsort(distances)[:,1:(k + 1)]

            return responsibilities(k, euclidean_dist(data))

        def predict_target(data, target, responsibilities):
            '''
            Performa a prediction of the target based on a weighting of contemporaneous neighbours of data.
            Inputs:
                data:             Data values (N x P)
                target:           Target values to perform prediction (N x P)
                responsibilities: A responsibility matrix (N x k)
            Returns:
                An array of predicted target values (N)
            '''

            def calculate_weights(data, responsibilities):
                '''
                Calculate weights based on the k-nearest neighbours
                Inputs:
                    data:             Data values (N x P)
                    responsibilities: A responsibility matrix (N x k)
                Returns:
                    A matrix of weights (N x k)
                '''
                # Obtain shape of responsibilities
                N, k = responsibilities.shape

                # Calculate values for numerator
                for i in range(k):
                    numerator = np.exp( - np.divide(euclidean_dist(data, data[responsibilities]), \
                                                    euclidean_dist(data, data[responsibilities])[:,0][:, np.newaxis]))

                # Calculate denominator
                denominator = np.sum(numerator, axis=1, keepdims=True)

                # Calculate and return weights
                return np.divide(numerator, denominator)

            weights = calculate_weights(data, responsibilities)
            return np.sum(target[responsibilities] * np.expand_dims(weights, axis=2), axis=1)

        def L2_score(target, prediction):
            '''
            Calculate the L2 distance between the min-max normalised values of target and prediction values.

            Inputs:
                target: An array of target values
                prediction: An array of prediction values

            Output:
                Min-max normalised average L2 distance of target-prediction pairs.
            '''
            assert len(target) == len(prediction)

            _combinedVars = np.append(target, prediction)
            target = (target - min(_combinedVars)) / (max(_combinedVars) - min(_combinedVars))
            prediction = (prediction - min(_combinedVars)) / (max(_combinedVars) - min(_combinedVars))

            return np.sqrt(1. / (len(target)) * np.sum((target - prediction)**2))

        def causality_index(target, prediction):
            '''
            Return a scalar causality index between `target` and `prediction`.

            Pearson correlation coefficient is used as a proxy for causality index.
            '''
            return np.corrcoef(target, prediction)[0,-1]


        ###################
        # Function begins #
        ###################

        # Define `source` and `target`
        source = DF[sourceCol].values
        target = DF[targetCol].values

        # Find indices of k-nearest neighbours
        responsibilities = kNN(k, source)

        # Calculate predicted target values
        predictions = predict_target(source, target, responsibilities)

        # Return the causality index
        return causality_index(target[:, -1], predictions[:, -1])
    
    # Generate empty list to store lagged causality indices
    causalityList = []
    
    # Iterate through [-`crossMapLags`, `crossMapLags`]
    for lag in reversed(range(-crossMapLags, crossMapLags + 1)):
        # Generate delayed embedding DF with appropriate cross map lag
        _DF = generate_delayed_df(DF, sourceCol, targetCol, lag, embedDim, delay)
        
        # Calculate causality of `target` → `source`
        _causality = CCM_one_way(_DF, sourceCol, targetCol)
        
        # Append value to list
        causalityList.append(_causality)
    
    return causalityList

### Define input generation functions

In [6]:
def random_normal_input(N, mu, var):
    '''
    Generate a 1D random normal time series of 
    length `N` with mean `mu` and variance `var`.
    '''
    return np.random.normal(mu, 
                            var, 
                            size=(N,))

def random_binary_input(N, binVals, binRatios=[0.5, 0.5]):
    '''
    Generate a 1D random binary time series of
    length `N` with binary values `binVals` and
    probabilistic ratios `binRatio`.
    
    Inputs:
        N:         Time series length (int)
        binVals:   Binary values (2-member list of float)
        binRatios: Binary ratios (2-member list of float)
    '''
    return np.random.choice(binVals, 
                            p=binRatios, 
                            size=(N,))

def aperiodic_random_binary(N, binVals, holdDuration=[1, 100]):
    '''
    Generate a 1D random binary time series of
    length `N` with binary values `binVals`,
    probabilistic ratios `binRatio` and a
    uniformly-sampled hold duration with
    bounds `holdDuration`.
    
    Inputs:
        N:            Time series length (int)
        binVals:      Binary values (2-member list of float)
        binRatios:    Binary ratios (2-member list of float)
        holdDuration: Lower and upper bound of hold duration 
                      (2-member list of int). Lower bound
                      must first value in list.
        
    '''
    def _flip(val):
        '''
        Flip val from one binary value to the other.
        '''
        return minVal if val == maxVal else maxVal
    
    # Extract values from parameters
    minVal, maxVal = binVals
    minDur, maxDur = holdDuration
    
    # Error checking
    assert minVal <= maxVal
    assert minDur < maxDur
    assert minDur > 0 and maxDur > 0
    
    # Initialise result with random min or max value
    duration = np.random.randint(minDur, maxDur)
    result = [np.random.choice([minVal, maxVal])] * duration
    
    while len(result) < N:
        duration = np.random.randint(minDur, maxDur)
        result = result + [_flip(result[-1])] * duration
    
    # Trim result to be only N in length
    return np.array(result[:N])

<a id='Section1'></a>

### Define first-order discrete transfer function

In [7]:
def simulate_discrete_tf(inputSignal, num, denom, delay=1, initVal=0.):
    '''
    Generate and output signal based on `inputSignal` 
    through a transfer function with numerators `num`
    and denominator `denom`. 
    
    Inputs:
        inputSignal: A NumPy input signal
        num: Transfer function numerator coefficients (list of float)
        denom: Transfer function denominator coefficients (list of float)
        delay: Transfer function delay (int)
        initVal: Initial value (list of float)
        
    Note:
        - length(`num`) == length(`initVal`) == length(`denom`) - 1
        - Coefficients are in increasing z^-1 order.
        - Delay has to be at least one.
    '''
    def _check_inputs(num, denom, initVal, delay):
        assert len(num) == len(initVal)
        assert len(num) == len(denom) - 1
        assert delay >= 1
        
    # Create transfer function
    tf = (num, denom + (delay - 1) * [0], 1.0)
    
    # Perform discrete system simulation
    _, outputSignal = signal.dlsim(tf, inputSignal)
    
    return np.squeeze(outputSignal)

## Extended CCM on Discrete First Order System
An input signal, $X$, is used to generate an output signal $Y$, using a first order transfer function. The transfer function selected is as follows:

$$ Y(z) = \frac{1}{1 - 0.5 z^{-1}} \cdot z^{-1} \cdot X(z)$$

The difference equation for the above transfer function is:

$$ Y(t) = X(t - 1) + 0.5 Y(t - 1) $$

In all cases, the causal network involved is $X$ → $Y$.


### Upper Right Corner (URC) Determination 
The Upper Right Corner (URC) is determined by finding the point *before* the maximum concativity occurs:

Suppose the causalities be denoted as $\mathbf{\rho} \left( \tau_\times \right)$, where $\tau_\times$ is the cross map lag. We can define the discrete backward derivative to be $\Delta_{\tau_\times} \rho(\tau_\times) = \rho(\tau_\times) - \rho(\tau_\times - 1)$.

The discrete backward second derivative can be expressed as:
$$ 
\begin{align}
\Delta_{\tau_\times}^2 \rho(\tau_\times)  
&= \Delta_{\tau_\times} \rho(\tau_\times) - \Delta_{\tau_\times} \rho(\tau_\times - 1) \\
&= \left[ \rho(\tau_\times) - \rho(\tau_\times - 1) \right] - \left[ \rho(\tau_\times - 1) - \rho(\tau_\times - 2) \right] \\
&= \rho(\tau_\times) - 2 \rho(\tau_\times - 1) + \rho(\tau_\times - 2)
\end{align}
$$

The upper right corner is found by taking $\tau_\times$ corresponding to the minimum $\Delta_{\tau_\times}^2 \rho (\tau_\times) $, for $\tau_\times$ where $\Delta_{\tau_\times} \rho (\tau_\times)$ and $\Delta_{\tau_\times}^2 \rho (\tau_\times)$ are nonpositive.

$$
\begin{align}
\text{URCLag}
&= \tau_{\times, \text{crit}} \\
&= \textit{argmin}_{\tau_\times} \Delta_{\tau_\times}^2 \rho( \tau_\times ) \\
\end{align}
$$
$$
\begin{align}
\text{for } \tau_\times \text{where }
\textit{Set(} \Delta_{\tau_\times} \rho(\tau_\times) \leq 0 \textit{)} \cap
\textit{Set(} \Delta_{\tau_\times}^2 \rho(\tau_\times) \leq 0 \textit{)}
\end{align}
$$

In [321]:
# Set time series length
N = 5000

# Set seed
seed = 0
np.random.seed(seed)

# Generate input signal
# X = random_normal_input(N, mu=0., var=1.)
# X = random_binary_input(N, binVals=[-1, 1], binRatios=[0.5, 0.5])
X = aperiodic_random_binary(N, binVals=[-1, 1], holdDuration=[1, 100])

# Generate output signal
Y = simulate_discrete_tf(X,
                         num=[1.0],
                         denom=[1.0, 0.3, 0.9],
                         delay=1
                        )

# Create Pandas DataFrame from input and output signals
statesDF = pd.DataFrame(np.append(X[:, np.newaxis], 
                                  Y[:, np.newaxis], 
                                  axis=1), 
                        columns=['X', 'Y'])

# Run Extended CCM algorithm
causalitiesDF, peaksDF = extCCM_wrapper(statesDF)

# Visualise results
visualise_results(statesDF, causalitiesDF, peaksDF)

System Time Series





State Space Plot





Extended CCM results


Unnamed: 0,Peak Causality,Peak xMap Lag,URC Causality,URC xMap Lag
X → Y,0.94,-2,0.940224,-2


## ECCM with Factorial Experiments
ECCM was tested on the same problems as above, now with transfer function pole and delay varied. 

In [318]:
# Set time series length
N = 5000

# Set seed
seed = 0
np.random.seed(seed)

# Generate input signal
# X = random_normal_input(N, mu=0., var=1.)
# X = random_binary_input(N, binVals=[-1, 1], binRatios=[0.5, 0.5])
X = aperiodic_random_binary(N, binVals=[-1, 1], holdDuration=[1, 100])

# Create DataFrame to store results
factorialPeaksDF = pd.DataFrame(columns=['Pole', 
                                         'Delay',
                                         'Peak Causality',
                                         'Peak Delta xMap Lag',
                                         'URC Causality',
                                         'URC Delta xMap Lag'])

# Set values for poles and delays
poles = [0.1 * i for i in range(1, 10, 2)]
delays = range(1, 4)
threshold = 0.5

# Performs factorial simulations using Carterisan
# products of poles and delays
for (pole, delay) in itertools.product(poles, delays):
    if pole == 0.7:
        continue
    # Generate output signal
    Y = simulate_discrete_tf(X,
                             num=[1.0],
                             denom=[1.0, -pole],
                             delay=delay
                            )

    # Create Pandas DataFrame from input and output signals
    statesDF = pd.DataFrame(np.append(X[:, np.newaxis], 
                                      Y[:, np.newaxis], 
                                      axis=1), 
                            columns=['X', 'Y'])

    # Run Extended CCM algorithm
    causalitiesDF, peaksDF = extCCM_wrapper(statesDF)
    
    # Filter peaks
    filteredDF = peaksDF[((peaksDF['Peak Causality'] >= threshold) \
                    &   (peaksDF['Peak xMap Lag'] < 0)) \
                    | ((peaksDF['URC Causality'] >= threshold) \
                    & (peaksDF['URC xMap Lag'] < 0))]
    
    # Rename and rearrange columns
    filteredDF.rename(columns={
        'Peak xMap Lag': 'Peak Delta xMap Lag',
        'URC xMap Lag': 'URC Delta xMap Lag',
    }, inplace=True)
    
    # Add appropriate pole and delay to peaksDF
    filteredDF['Pole'] = pole
    filteredDF['Delay'] = delay
    
    # Correct delta delays
    filteredDF['Peak Delta xMap Lag'] = filteredDF['Peak Delta xMap Lag'] - (-delay)
    filteredDF['URC Delta xMap Lag'] = filteredDF['URC Delta xMap Lag'] - (-delay)
    
    # Append to DataFrame
    factorialPeaksDF = factorialPeaksDF.append(filteredDF)
    
#     if delay == 1:
    print 'Pole: {}, Delay: {} Complete!'.format(pole, delay)

# factorialPeaksDF.set_index([[factorialPeaksDF.index, 'Pole', 'Delay']])

# Rearrange columns
factorialPeaksDF = factorialPeaksDF[['Pole', 
                                    'Delay',
                                    'Peak Causality',
                                    'Peak Delta xMap Lag',
                                    'URC Causality',
                                    'URC Delta xMap Lag']]

# Display DataFrame
display(factorialPeaksDF)

Pole: 0.1, Delay: 1 Complete!
Pole: 0.1, Delay: 2 Complete!
Pole: 0.1, Delay: 3 Complete!
Pole: 0.3, Delay: 1 Complete!
Pole: 0.3, Delay: 2 Complete!
Pole: 0.3, Delay: 3 Complete!
Pole: 0.5, Delay: 1 Complete!
Pole: 0.5, Delay: 2 Complete!
Pole: 0.5, Delay: 3 Complete!
Pole: 0.7, Delay: 1 Complete!
Pole: 0.7, Delay: 2 Complete!
Pole: 0.7, Delay: 3 Complete!
Pole: 0.9, Delay: 1 Complete!
Pole: 0.9, Delay: 2 Complete!
Pole: 0.9, Delay: 3 Complete!


Unnamed: 0,Pole,Delay,Peak Causality,Peak Delta xMap Lag,URC Causality,URC Delta xMap Lag
X → Y,0.1,1,1.0,-4,1.0,0
X → Y,0.1,2,1.0,-3,1.0,0
X → Y,0.1,3,1.0,-2,1.0,0
X → Y,0.3,1,1.0,-4,1.0,0
X → Y,0.3,2,1.0,-3,1.0,0
X → Y,0.3,3,1.0,-2,1.0,0
X → Y,0.5,1,1.0,-4,1.0,0
X → Y,0.5,2,1.0,-3,1.0,0
X → Y,0.5,3,1.0,-2,1.0,0
X → Y,0.7,1,1.0,0,1.0,0
