# k-Nearest Neighbours

## Library importing, environment configurations and data importing

### Import relevant packages:

In [1]:
# Importing packages
import numpy as np
import os
# Plot.ly visualisations
import plotly
import plotly.offline as pyo # Plot.ly visualisations
import plotly.graph_objs as go # Plot.ly visualisations

### Configure environment:

In [2]:
%config InlineBackend.figure_format = 'retina'
np.set_printoptions(precision=3)

# Activate Plotly Offline for Jupyter
pyo.init_notebook_mode(connected=True)

### Define functions:

In [4]:
# def visualise_attractor(data, target, responsibilities, predictions):
        '''
        Produce Plotly animation on a 1 x 2 subplot to visualise nearest neighbours of data and target.
        Inputs:
            data:        Data values (N x P)
            target:      Target values (N x P)
            responsibilities: A responsibility matrix (N x k)
            predictions: Predicted values for target (N x P)
        '''
        fig = plotly.tools.make_subplots(rows=1, cols=2)
        
        # Define colour list
        colour_list = np.array(['#b3b3b3', '#0f3957', '#1f77b4', '#ff7f0e'])

        # Define blank figure
        figure = {
            'data': [],
            'layout': {},
            'frames': []
        }

        # Create layout
        figure['layout'] = {
            'width': 1000,
            'height': 700,
            'yaxis1': {'domain': [0., 1.], 'anchor': 'x1'},
            'xaxis1': {'domain': [0., 0.45], 'anchor': 'y1'},
            'yaxis2': {'domain': [0., 1.], 'anchor': 'x2'},
            'xaxis2': {'domain': [0.55, 1.], 'anchor': 'y2'},
            'title': 'Visualising Nearest Neighbours on Attractors',
            'showlegend': False
        }
        
        # Define buttons
        figure['layout']['updatemenus'] = [
            {
                'buttons': [
                    {
                        'args': [None, {'frame': {'duration': 1000, 'redraw': False},
                                 'fromcurrent': True, 'transition': {'duration': 0, 'easing': 'quadratic-in-out'}}],
                        'label': 'Play',
                        'method': 'animate'
                    },
                    {
                        'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                        'transition': {'duration': 0}}],
                        'label': 'Pause',
                        'method': 'animate'
                    }
                ],
                'direction': 'left',
                'pad': {'r': 10, 't': 87},
                'showactive': False,
                'type': 'buttons',
                'x': 0.1,
                'xanchor': 'right',
                'y': 0,
                'yanchor': 'top'
            }
        ]
        
        # Define slider dictionary
        slider_dict = {
            'active': 0, # Slider knob's relative starting location
            'pad': {'b': 10, 't': 50}, # Bottom and top padding
            'len': 0.9, # Slider length
            'x': 0.1, # Slider x-position
            'y': 0, # Slider y-position
            'yanchor': 'top', 
            'xanchor': 'left',
            'currentvalue': { # Displays current value selected by slider
                'font': {'size': 20},
                'prefix': 'Time index: ',
                'visible': True,
                'xanchor': 'right'
            },
            'transition': {'duration': 300, 'easing': 'cubic-in-out'},
            'steps': []
        }

        # Create frames
        for i in range(len(predictions)):
            # Define a dictionary for each frame
            frame = {
                'data': [],
                'name': str(i + 1) # Used to connect each frame to slider value
            }
            
            # Create raw data trace
            data_trace = {
                'x': data[:,0],
                'y': data[:,1],
                'mode': 'markers',
                'type': 'scatter',
                'xaxis': 'x1',
                'yaxis': 'y1',
                'hoverinfo': 'none',
                'marker': {
                    'size': 4,
                    'color': colour_list[0]
                }
            }
            
            # Create source point trace
            source_trace = {
                'x': [data[i,0]],
                'y': [data[i,1]],
                'mode': 'markers',
                'type': 'scatter',
                'name': 'Source',
                'xaxis': 'x1',
                'yaxis': 'y1',
                'hoverinfo': 'name',
                'marker': {
                    'size': 10,
                    'symbol': 'diamond',
                    'color': colour_list[2],
                    'line': {'width': 1}
                }
            }
            
            # Create source neighbours trace
            source_neighbour_trace = {
                'x': data[responsibilities[i,:],0],
                'y': data[responsibilities[i,:],1],
                'mode': 'markers',
                'type': 'scatter',
                'name': 'Source Neighbour',
                'xaxis': 'x1',
                'yaxis': 'y1',
                'hoverinfo': 'name',
                'marker': {
                    'size': 4,
                    'color': colour_list[2],
                }
            }
            
            # Create target trace
            target_trace = {
                'x': target[:,0],
                'y': target[:,1],
                'mode': 'markers',
                'type': 'scatter',
                'xaxis': 'x2',
                'yaxis': 'y2',
                'hoverinfo': 'none',
                'marker': {
                    'size': 4,
                    'color': colour_list[0]
                }
            }
            
            # Create destination point trace
            actual_destination_trace = {
                'x': [target[i,0]],
                'y': [target[i,1]],
                'mode': 'markers',
                'type': 'scatter',
                'name': 'Actual Target',
                'xaxis': 'x2',
                'yaxis': 'y2',
                'hoverinfo': 'name',
                'marker': {
                    'size': 12,
                    'symbol': 'diamond',
                    'color': colour_list[3],
                    'line': {'width': 2}
                }
            }
            
            # Create destination neighbours trace
            destination_neighbour_trace = {
                'x': target[responsibilities[i,:],0],
                'y': target[responsibilities[i,:],1],
                'mode': 'markers',
                'type': 'scatter',
                'name': 'Target Neighbours',
                'xaxis': 'x2',
                'yaxis': 'y2',
                'hoverinfo': 'name',
                'marker': {
                    'size': 4,
                    'color': colour_list[2],
                }
            } 
            
            # Create predicted destination trace
            predicted_destination_trace = {
                'x': [predictions[i,0]],
                'y': [predictions[i,1]],
                'mode': 'markers',
                'type': 'scatter',
                'name': 'Predicted Target',
                'xaxis': 'x2',
                'yaxis': 'y2',
                'hoverinfo': 'name',
                'marker': {
                    'size': 10,
                    'symbol': 'diamond',
                    'color': colour_list[2],
                    'line': {'width': 2}
                }
            }

            # Append traces to frame
            for trace in [data_trace, source_trace, source_neighbour_trace, \
                          target_trace, destination_neighbour_trace, \
                          actual_destination_trace, predicted_destination_trace]:
                frame['data'].append(trace)
            
            # Append frame to figure
            figure['frames'].append(frame)
            
            # Define slider step
            slider_step = {
                'args': [
                    [i + 1],
                    {'frame': {'duration': 300, 'redraw': False},
                     'mode': 'immediate',
                     'transition': {'duration': 0}}
                ],
                'label': i + 1,
                'method': 'animate'
            }
            
            # Append slider step to slider dictionary
            slider_dict['steps'].append(slider_step)
            
        # Add sliders to layout
        figure['layout']['sliders'] = [slider_dict]
        
        # Define figure['data']
        figure['data'] = figure['frames'][0]['data']
        
        # Save snapshots locally
        pyo.iplot(figure)

IndentationError: unexpected indent (<ipython-input-4-6d329400c286>, line 2)

In [49]:
def CCM(data, target, k, attractor_viz=False, prediction_corr_viz=False):
    '''
    Perform convergent cross-mapping (CCM) algorithm described in paper.
    Inputs:
        data:   Data to perform k-NN, a numpy array (N x P)
        target: Target values to perform prediction (N x P)
        k:      Number of nearest neighbours (scalar)
    '''
    def euclidean_dist(A, B=None):
        '''
        Calculate the euclidean distance for rows in matrix A and rows in matrix B.
        If B is None, calculates distances for rows between matrix A.
        Inputs:
            A: A matrix (a x P)
            B: A matrix (b x k x P)
        Returns:
            A distance matrix (a x b), indicating the distance of all non-i-th point to the i-th point. 
        ''' 
        # Define input matrices with expanded dimensions
        A_expanded = np.expand_dims(A, 2)
        
        # Calculate distance of each point and every other point
        if B is None:
            return np.sqrt(np.sum(np.square(A_expanded - np.transpose(A_expanded, (2, 1, 0))), axis=1))
        else:
            return np.sqrt(np.sum(np.square(np.transpose(A_expanded, (0,2,1)) - B), axis=2))
   
    def kNN(k, data):
        '''
        Return the nearest neighbours to each row in data in the form of a responsibility matrix.
        Inputs:
            k:    Number of nearest neighbours (scalar)
            data: Data to perform k-NN, a numpy array (N x P)
        Returns:
            A responsibility matrix (N x k), listing the indices of the k-nearest neighbours for each row
        '''

        def responsibilities(k, distances):
            '''
            Finds the k-nearest neighbours to each point by index.
            Inputs:
                k:         Number of nearest neighbours (scalar)
                distances: A distance matrix (N x N)
            Returns:
                A responsibility matrix (N x k), listing the indices of the k-nearest neighbours for each row
            '''
            return np.argsort(distances)[:,1:(k + 1)]

        return responsibilities(k, euclidean_dist(data))

    def predict_target(data, target, responsibilities):
        '''
        Performa a prediction of the target based on a weighting of contemporaneous neighbours of data.
        Inputs:
            data:             Data values (N x P)
            target:           Target values to perform prediction (N x P)
            responsibilities: A responsibility matrix (N x k)
        Returns:
            An array of predicted target values (N)
        '''

        def calculate_weights(data, responsibilities):
            '''
            Calculate weights based on the k-nearest neighbours
            Inputs:
                data:             Data values (N x P)
                responsibilities: A responsibility matrix (N x k)
            Returns:
                A matrix of weights (N x k)
            '''
            # Obtain shape of responsibilities
            N, k = responsibilities.shape

            # Calculate values for numerator
            for i in range(k):
                numerator = np.exp( - np.divide(euclidean_dist(data, data[responsibilities]), \
                                                euclidean_dist(data, data[responsibilities])[:,0][:, np.newaxis]))

            # Calculate denominator
            denominator = np.sum(numerator, axis=1, keepdims=True)

            # Calculate and return weights
            return np.divide(numerator, denominator)
        
        weights = calculate_weights(data, responsibilities)
        return np.sum(target[responsibilities] * np.expand_dims(weights, axis=2), axis=1)
    
    def visualise_attractor(data, target, responsibilities, predictions):
        '''
        Produce Plotly animation on a 1 x 2 subplot to visualise nearest neighbours of data and target.
        Inputs:
            data:        Data values (N x P)
            target:      Target values (N x P)
            responsibilities: A responsibility matrix (N x k)
            predictions: Predicted values for target (N x P)
        '''
        fig = plotly.tools.make_subplots(rows=1, cols=2, specs=[[{'is_3d': True}, {'is_3d': True}]])
        
        # Define colour list
        colour_list = np.array(['#b3b3b3', '#0f3957', '#1f77b4', '#ff7f0e'])

        # Define blank figure
        figure = {
            'data': [],
            'layout': {},
            'frames': []
        }

        # Create layout
        figure['layout'] = {
            'width': 1000,
            'height': 700,
            'scene1': {
                'domain': {
                    'x': [0, 0.45],
                    'y': [0., 1.]
                }
            },
            'scene2': {
                'domain': {
                    'x': [0.55, 1.],
                    'y': [0., 1.]
                }
            },
            'title': 'Visualising Nearest Neighbours on Attractors',
            'showlegend': False
        }
        
        # Define buttons
        figure['layout']['updatemenus'] = [
            {
                'buttons': [
                    {
                        'args': [None, {'frame': {'duration': 1000, 'redraw': False},
                                 'fromcurrent': True, 'transition': {'duration': 0, 'easing': 'quadratic-in-out'}}],
                        'label': 'Play',
                        'method': 'animate'
                    },
                    {
                        'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                        'transition': {'duration': 0}}],
                        'label': 'Pause',
                        'method': 'animate'
                    }
                ],
                'direction': 'left',
                'pad': {'r': 10, 't': 87},
                'showactive': False,
                'type': 'buttons',
                'x': 0.1,
                'xanchor': 'right',
                'y': 0,
                'yanchor': 'top'
            }
        ]
        
        # Define slider dictionary
        slider_dict = {
            'active': 0, # Slider knob's relative starting location
            'pad': {'b': 10, 't': 50}, # Bottom and top padding
            'len': 0.9, # Slider length
            'x': 0.1, # Slider x-position
            'y': 0, # Slider y-position
            'yanchor': 'top', 
            'xanchor': 'left',
            'currentvalue': { # Displays current value selected by slider
                'font': {'size': 20},
                'prefix': 'Time index: ',
                'visible': True,
                'xanchor': 'right'
            },
            'transition': {'duration': 300, 'easing': 'cubic-in-out'},
            'steps': []
        }

        # Create frames
        for i in range(len(predictions)):
            # Define a dictionary for each frame
            frame = {
                'data': [],
                'name': str(i + 1) # Used to connect each frame to slider value
            }
            
            # Create raw data trace
            data_trace = {
                'x': data[:,0],
                'y': data[:,1],
                'z': data[:,2],
                'mode': 'markers',
                'type': 'scatter3d',
                'hoverinfo': 'none',
                'scene': 'scene1',
                'marker': {
                    'size': 4,
                    'color': colour_list[0]
                }
            }
            
            # Create source point trace
            source_trace = {
                'x': [data[i,0]],
                'y': [data[i,1]],
                'z': [data[i,2]],
                'mode': 'markers',
                'type': 'scatter3d',
                'name': 'Source',
                'scene': 'scene1',
                'hoverinfo': 'name',
                'marker': {
                    'size': 10,
                    'symbol': 'diamond',
                    'color': colour_list[2],
                    'line': {'width': 1}
                }
            }
            
            # Create source neighbours trace
            source_neighbour_trace = {
                'x': data[responsibilities[i,:],0],
                'y': data[responsibilities[i,:],1],
                'z': data[responsibilities[i,:],2],
                'mode': 'markers',
                'type': 'scatter3d',
                'name': 'Source Neighbour',
                'scene': 'scene1',
                'hoverinfo': 'name',
                'marker': {
                    'size': 4,
                    'color': colour_list[2],
                }
            }
            
            # Create target trace
            target_trace = {
                'x': target[:,0],
                'y': target[:,1],
                'z': target[:,2],
                'mode': 'markers',
                'type': 'scatter3d',
                'scene': 'scene2',
                'hoverinfo': 'none',
                'marker': {
                    'size': 4,
                    'color': colour_list[0]
                }
            }
            
            # Create destination point trace
            actual_destination_trace = {
                'x': [target[i,0]],
                'y': [target[i,1]],
                'z': [target[i,2]],
                'mode': 'markers',
                'type': 'scatter3d',
                'name': 'Actual Target',
                'scene': 'scene2',
                'hoverinfo': 'name',
                'marker': {
                    'size': 12,
                    'symbol': 'diamond',
                    'color': colour_list[3],
                    'line': {'width': 2}
                }
            }
            
            # Create destination neighbours trace
            destination_neighbour_trace = {
                'x': target[responsibilities[i,:],0],
                'y': target[responsibilities[i,:],1],
                'z': target[responsibilities[i,:],2],
                'mode': 'markers',
                'type': 'scatter3d',
                'name': 'Target Neighbours',
                'scene': 'scene2',
                'hoverinfo': 'name',
                'marker': {
                    'size': 4,
                    'color': colour_list[2],
                }
            } 
            
            # Create predicted destination trace
            predicted_destination_trace = {
                'x': [predictions[i,0]],
                'y': [predictions[i,1]],
                'z': [predictions[i,2]],
                'mode': 'markers',
                'type': 'scatter3d',
                'name': 'Predicted Target',
                'scene': 'scene2',
                'hoverinfo': 'name',
                'marker': {
                    'size': 10,
                    'symbol': 'diamond',
                    'color': colour_list[2],
                    'line': {'width': 2}
                }
            }

            # Append traces to frame
            for trace in [data_trace, source_trace, source_neighbour_trace, \
                          target_trace, destination_neighbour_trace, \
                          actual_destination_trace, predicted_destination_trace]:
                frame['data'].append(trace)
            
            # Append frame to figure
            figure['frames'].append(frame)
            
            # Define slider step
            slider_step = {
                'args': [
                    [i + 1],
                    {'frame': {'duration': 300, 'redraw': False},
                     'mode': 'immediate',
                     'transition': {'duration': 0}}
                ],
                'label': i + 1,
                'method': 'animate'
            }
            
            # Append slider step to slider dictionary
            slider_dict['steps'].append(slider_step)
            
        # Add sliders to layout
        figure['layout']['sliders'] = [slider_dict]
        
        # Define figure['data']
        figure['data'] = figure['frames'][0]['data']
        
        # Save snapshots locally
        pyo.iplot(figure)
            
            
            
            
            
            
        
    
    def visualise_predictions(target, predictions):
        '''
        Create a scatterplot visualising predictions vs. target.
        Inputs:
            target:      Target values (N x P)
            predictions: Prediction values (N x P)
        '''
        trace = go.Scatter(
            x = target[:,-1],
            y = predictions[:,-1],
            mode = 'markers',
        )
        
        line_trace = go.Scatter(
            x = [0, 1],
            y = [0, 1],
            mode = 'lines',
            hoverinfo = 'none',
            line = {
                'color': '#000000',
                'dash': 'dash',
                'width': 3
            }
        )
        
        layout = go.Layout(
            title = 'Correlation Plot (r = {})'\
                    .format(np.round(np.corrcoef(target[:,-1], predictions[:,-1])[0,-1], 3)),
            showlegend = False,
            height = 800,
            width = 700,
            xaxis = {'title': 'Target',},
            yaxis = {'title': 'Prediction', 'scaleanchor': 'x'},
        )
        
        figure = go.Figure(data=go.Data([trace, line_trace]), layout=layout)
        pyo.iplot(figure)
        
    
    ###################
    # Function begins #
    ###################
    
    # Find indices of k-nearest neighbours
    responsibilities = kNN(k, data)
    print 'k-NN complete!'
    
    # Calculate predicted target values
    predictions = predict_target(data, target, responsibilities)
    print 'Predictions complete!'
    
    # Create interactive attractor animation
    if attractor_viz == True:
        visualise_attractor(data, target, responsibilities, predictions)
        
    # Create correlation plot
    if prediction_corr_viz == True:
        visualise_predictions(target, predictions)
    
    return predictions

## Section 1: Validating Causal Structure from Time Series
The time series used to generate the values stem from the following 2-species predator-prey equation:

$
\begin{align}
X(t + 1) &= X(t) \left[ r_x - r_x X(T) + \gamma_{xy} Y(t) \right] \\
Y(t + 1) &= Y(t) \left[ r_y - r_y Y(T) + \gamma_{yx} X(t) \right]
\end{align}
$

The initial conditions $X(0)$ and $Y(0)$ are obtained from random uniform distribution between 0 and 1 (i.e. $U(0,1)$).

Delayed time-series $\mathbf{Y}$ and $\mathbf{X}$, each with delayed embedding dimensions and lags of $L = 2$ and $\tau = 1$ were represented using variables '`data`' and '`target`' respectively.

The regulation parameters for all examples are set to be $r_x = 3.7$ and $r_y = 3.8$. 

The example below is intended to be for a unidirectional causal system (i.e. $\mathbf{X} \rightarrow \mathbf{Y}$). As such, the coupling parameters are set such that:

$
\begin{align}
\gamma_{xy} &= 0 \\
\gamma_{yx} &= 0.32
\end{align}
$

Te

### Importing data:

In [58]:
"""
source.npz is a dictionary containing keys 'X' and 'Y', each of which holds an (N x P) matrix.
"""
# Load data
source = np.load("./Data/RBFN/source_ex4_10000.npz")

# Create data and target variables
data = source['Y']
target = source['X']

print 'Data shape: ', data.shape

Data shape:  (10000, 2)


### Detecting causality from $\mathbf{X} \rightarrow \mathbf{Y}$

In [59]:
CCM(data=data, target=target, k=3, attractor_viz=False, prediction_corr_viz=True);

k-NN complete!
Predictions complete!


### Detecting causality from $\mathbf{Y} \rightarrow \mathbf{X}$

In [60]:
CCM(data=target, target=data, k=3, attractor_viz=False, prediction_corr_viz=True);

k-NN complete!
Predictions complete!
