In [1]:
import numpy as np

# Import classes
from idtxl.bivariate_mi import BivariateMI
from idtxl.data import Data
from idtxl.visualise_graph import plot_network
import matplotlib.pyplot as plt

### Bivariate Mutual Information - sanity check

1. Generate random data
2. Cycle the random data by n steps, creating a new random data
3. Compute MI over a range of lags (-5, 5), see if it is significant

**Conclusions**
1. As expected, no relationship found when lag is higher than the expected lag range
2. As expected, connection of correct direction found for positive and negative lags
3. As expected, for zero lag MI finds that both datasets are connected bi-directionally.

In [12]:
# This function will cycle datapoints by n steps forwards
def cycle(data, n):
    if n == 0:
        return data
    else:
        return np.hstack((data[n:], data[:n]))

    
#######################
# Initialize
#######################
# a) Create random data
NDATA = 1000
dataNP = np.zeros((2, NDATA))
dataNP[0,:] = np.random.normal(0, 1, NDATA)

# b) Initialise analysis object and define settings
network_analysis = BivariateMI()
settings = {'cmi_estimator': 'JidtGaussianCMI',
            'max_lag_sources': 5,
            'min_lag_sources': 0,
           'verbose' : False}

#######################
# Run analysis
#######################

cycleList = list(range(-6, 7))

results = []
for iCycle in cycleList:
    # Create 2nd dataset, which is a noisy cycle of the first one
    dataNP[1,:] = cycle(2*np.copy(dataNP[0,:]), iCycle) + np.random.normal(0, 1, NDATA)
    data = Data(dataNP, dim_order='ps')
    
    # Run analysis of the network
    results.append(network_analysis.analyse_network(settings=settings, data=data))

#######################
# Print and plot results
#######################

for iCycle, rez in zip(cycleList, results):
    print(":: For cycle =", iCycle)
    for i in range(2):
        rezThis = rez.get_single_target(i, fdr=False)
        print("For var", i, "sources", rezThis['sources_tested'], 'mi', rezThis['mi'], 'p', rezThis['selected_sources_pval'])

# print(results.get_single_target(0, fdr=False))
# print(results.settings)
# print(results.data_properties)

# d) Plot inferred network to console and via matplotlib
# results.print_edge_list(weights='max_te_lag', fdr=False)
# plot_network(results=results, weights='max_te_lag', fdr=False)
# plt.show()

Adding data with properties: 2 processes, 1000 samples, 1 replications
overwriting existing data

---------------------------- (1) include source candidates


---------------------------- (2) prune candidates

---------------------------- (3) final statistics

---------------------------- (1) include source candidates


---------------------------- (2) prune candidates

---------------------------- (3) final statistics
No links in final results ...
Adding data with properties: 2 processes, 1000 samples, 1 replications
overwriting existing data

---------------------------- (1) include source candidates


---------------------------- (2) prune candidates

---------------------------- (3) final statistics

---------------------------- (1) include source candidates


---------------------------- (2) prune candidates
selected vars sources [(0, 0)]
selected candidates current source: [(0, 5)]

---------------------------- (3) final statistics
Adding data with properties: 2 processes, 1000 s


---------------------------- (2) prune candidates

---------------------------- (3) final statistics

---------------------------- (1) include source candidates


---------------------------- (2) prune candidates

---------------------------- (3) final statistics
No links in final results ...
:: For cycle = -6
For var 0 sources [1] mi None p None
For var 1 sources [0] mi None p None
:: For cycle = -5
For var 0 sources [1] mi None p None
For var 1 sources [0] mi [0.79384581] p [0.002]
:: For cycle = -4
For var 0 sources [1] mi None p None
For var 1 sources [0] mi [0.83690814] p [0.002]
:: For cycle = -3
For var 0 sources [1] mi None p None
For var 1 sources [0] mi [0.81456787] p [0.002]
:: For cycle = -2
For var 0 sources [1] mi None p None
For var 1 sources [0] mi [0.81046403] p [0.002]
:: For cycle = -1
For var 0 sources [1] mi None p None
For var 1 sources [0] mi [0.8264654] p [0.002]
:: For cycle = 0
For var 0 sources [1] mi [0.80536613] p [0.002 0.002]
For var 1 sources [0] mi [0.

{'sources_tested': [1], 'current_value': (0, 5), 'selected_vars_sources': [(1, 1)], 'selected_vars_target': [], 'selected_sources_pval': array([0.002]), 'selected_sources_mi': array([0.83001421]), 'omnibus_mi': 0.8300142064797621, 'omnibus_pval': 0.002, 'omnibus_sign': True, 'mi': array([0.83001421])}
{'cmi_estimator': 'JidtGaussianCMI', 'max_lag_sources': 5, 'min_lag_sources': 0, 'verbose': False, 'fdr_correction': True, 'add_conditionals': None, 'tau_sources': 1, 'local_values': False, 'debug': False, 'n_perm_max_stat': 200, 'alpha_max_stat': 0.05, 'permute_in_time': True, 'perm_type': 'random', 'analytical_surrogates': True, 'n_perm_omnibus': 500, 'alpha_omnibus': 0.05, 'n_perm_max_seq': 500, 'alpha_max_seq': 0.05, 'alpha_fdr': 0.05, 'fdr_correct_by_target': True, 'fdr_constant': 2}
{'n_nodes': 2, 'n_realisations': 995, 'normalised': True}


In [3]:
covariance = 0.4
n = 1000
source = np.random.randn(n)
target = (1 - covariance) * np.random.randn(n) + covariance * source

data = Data(np.vstack((source, target)),
            dim_order='ps', normalise=False)
settings = {
    'cmi_estimator': 'JidtKraskovCMI',
    'n_perm_max_stat': 21,
    'n_perm_min_stat': 21,
    'n_perm_max_seq': 21,
    'n_perm_omnibus': 21,
    'max_lag_sources': 0,
    'min_lag_sources': 0}
nw = BivariateMI()
results = nw.analyse_single_target(
    settings, data, target=1, sources='all')

print(results.get_single_target(1, fdr=False))
# the following should return [(0, 0)]
print(results.get_single_target(1, fdr=False).selected_vars_sources)

results.print_edge_list(weights='max_te_lag', fdr=False)

Adding data with properties: 2 processes, 1000 samples, 1 replications
overwriting existing data

Target: 1 - testing sources [0]

---------------------------- (1) include source candidates
candidate set current source: [(0, 1)]
testing candidate: (0, 1) 
maximum statistic, n_perm: 21
 -- not significant

---------------------------- (2) prune candidates
no sources selected, nothing to prune ...

---------------------------- (3) final statistics
no sources selected ...
final source samples: []
final target samples: []


{'sources_tested': [0], 'current_value': (1, 1), 'selected_vars_sources': [], 'selected_vars_target': [], 'selected_sources_pval': None, 'selected_sources_mi': None, 'omnibus_mi': None, 'omnibus_pval': None, 'omnibus_sign': False, 'mi': None}
[]
No significant links found in the network.
