In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

In [2]:
from te_datasim.jointprocess import MVJointProcessSimulator
from te_datasim.lineargaussian import MVLinearGaussianSimulator

In [3]:
from reference_knnksg import TE_knnksg

In [5]:
from util_results import Results

# Basic Validity

In [6]:
REPLICATES = 10
SAMPLE_SIZE = 10000

## Linear Gaussian

In [179]:
# Specify the range of lambda values to test
lg_lambda_range = list(np.linspace(0, 1, 9, endpoint=True))

# Initialize the list of generators with one for each lambda value
lg_generator_lst = [MVLinearGaussianSimulator(n_dim=1, coupling=lam) for lam in lg_lambda_range]

# get the reference values
lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]
lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]

In [180]:
lg_results_TE_X2Y = Results(columns=['method', 'coupling'])
lg_results_TE_Y2X = Results(columns=['method', 'coupling'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for lam, generator in zip(lg_lambda_range, lg_generator_lst):
        print("# Coupling = ", lam, "#")
        # Simulate data
        X, Y = generator.simulate(time=SAMPLE_SIZE, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_knnksg(X, Y)
        lg_results_TE_X2Y.write(method='knnksg', coupling=lam, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_knnksg(Y, X)
        lg_results_TE_Y2X.write(method='knnksg', coupling=lam, value=TE_Y2X)


### REPLICATE 1/50 ###

# Coupling =  0.0 #
# Coupling =  0.125 #
# Coupling =  0.25 #
# Coupling =  0.375 #
# Coupling =  0.5 #
# Coupling =  0.625 #
# Coupling =  0.75 #
# Coupling =  0.875 #
# Coupling =  1.0 #

### REPLICATE 2/50 ###

# Coupling =  0.0 #
# Coupling =  0.125 #
# Coupling =  0.25 #
# Coupling =  0.375 #
# Coupling =  0.5 #
# Coupling =  0.625 #
# Coupling =  0.75 #
# Coupling =  0.875 #
# Coupling =  1.0 #

### REPLICATE 3/50 ###

# Coupling =  0.0 #
# Coupling =  0.125 #
# Coupling =  0.25 #
# Coupling =  0.375 #
# Coupling =  0.5 #
# Coupling =  0.625 #
# Coupling =  0.75 #
# Coupling =  0.875 #
# Coupling =  1.0 #

### REPLICATE 4/50 ###

# Coupling =  0.0 #
# Coupling =  0.125 #
# Coupling =  0.25 #
# Coupling =  0.375 #
# Coupling =  0.5 #
# Coupling =  0.625 #
# Coupling =  0.75 #
# Coupling =  0.875 #
# Coupling =  1.0 #

### REPLICATE 5/50 ###

# Coupling =  0.0 #
# Coupling =  0.125 #
# Coupling =  0.25 #
# Coupling =  0.375 #
# Coupling =  0.5 #
# Coupling

In [181]:
# Specify the range of lambda values to test
jp_lambda_range = list(np.linspace(-3, 3, 9, endpoint=True))

# Initialize the list of generators with one for each lambda value
jp_generator_lst = [MVJointProcessSimulator(n_dim=1, lam=lam) for lam in jp_lambda_range]

# get the reference values
jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]
jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]

In [182]:
jp_results_TE_X2Y = Results(columns=['method', 'coupling'])
jp_results_TE_Y2X = Results(columns=['method', 'coupling'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for lam, generator in zip(jp_lambda_range, jp_generator_lst):
        print("# Coupling = ", lam, "#")
        # Simulate data
        X, Y = generator.simulate(time=SAMPLE_SIZE, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_knnksg(X, Y)
        jp_results_TE_X2Y.write(method='knnksg', coupling=lam, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_knnksg(Y, X)
        jp_results_TE_Y2X.write(method='knnksg', coupling=lam, value=TE_Y2X)


### REPLICATE 1/50 ###

# Coupling =  -3.0 #
# Coupling =  -2.25 #
# Coupling =  -1.5 #
# Coupling =  -0.75 #
# Coupling =  0.0 #
# Coupling =  0.75 #
# Coupling =  1.5 #
# Coupling =  2.25 #
# Coupling =  3.0 #

### REPLICATE 2/50 ###

# Coupling =  -3.0 #
# Coupling =  -2.25 #
# Coupling =  -1.5 #
# Coupling =  -0.75 #
# Coupling =  0.0 #
# Coupling =  0.75 #
# Coupling =  1.5 #
# Coupling =  2.25 #
# Coupling =  3.0 #

### REPLICATE 3/50 ###

# Coupling =  -3.0 #
# Coupling =  -2.25 #
# Coupling =  -1.5 #
# Coupling =  -0.75 #
# Coupling =  0.0 #
# Coupling =  0.75 #
# Coupling =  1.5 #
# Coupling =  2.25 #
# Coupling =  3.0 #

### REPLICATE 4/50 ###

# Coupling =  -3.0 #
# Coupling =  -2.25 #
# Coupling =  -1.5 #
# Coupling =  -0.75 #
# Coupling =  0.0 #
# Coupling =  0.75 #
# Coupling =  1.5 #
# Coupling =  2.25 #
# Coupling =  3.0 #

### REPLICATE 5/50 ###

# Coupling =  -3.0 #
# Coupling =  -2.25 #
# Coupling =  -1.5 #
# Coupling =  -0.75 #
# Coupling =  0.0 #
# Coupling =  0.7

# Sample size scaling

In [198]:
REPLICATES = 50

In [199]:
lg_generator = MVLinearGaussianSimulator(n_dim=1, coupling=0.5)
jp_generator = MVJointProcessSimulator(n_dim=1, lam=0.0)
sample_sizes = [500, 1000, 5000, 10000, 50000, 100000]

In [200]:
lg_results_TE_X2Y = Results(columns=['method', 'sample_size'])
lg_results_TE_Y2X = Results(columns=['method', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for samples in sample_sizes:
        print("# Samples = ", samples, "samples")
        # Simulate data
        X, Y = lg_generator.simulate(time=samples, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_knnksg(X, Y)
        lg_results_TE_X2Y.write(method='knnksg', sample_size=samples, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_knnksg(Y, X)
        lg_results_TE_Y2X.write(method='knnksg', sample_size=samples, value=TE_Y2X)


### REPLICATE 1/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 2/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 3/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 4/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 5/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 6/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 7/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Cou

In [201]:
jp_results_TE_X2Y = Results(columns=['method', 'sample_size'])
jp_results_TE_Y2X = Results(columns=['method', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for samples in sample_sizes:
        print("# Samples = ", samples, "#")
        # Simulate data
        X, Y = jp_generator.simulate(time=samples, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_knnksg(X, Y)
        jp_results_TE_X2Y.write(method='knnksg', sample_size=samples, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_knnksg(Y, X)
        jp_results_TE_Y2X.write(method='knnksg', sample_size=samples, value=TE_Y2X)


### REPLICATE 1/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 2/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 3/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 4/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 5/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 6/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #

### REPLICATE 7/50 ###

# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Coupling =  3.0 #
# Cou

# Dimensionality Scaling with redundant dimensions

In [9]:
REPLICATES = 5

In [8]:
dim_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sample_sizes = [10000, 100000]

## Linear Gaussian

In [13]:
# Initialize the list of generators with one for each dimension
lg_generator_lst = [MVLinearGaussianSimulator(n_dim=dim, n_redundant_dim=dim-1) for dim in dim_range]
# Get the reference values
lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]
lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]

In [14]:
lg_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
lg_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, lg_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_knnksg(X, Y)
            lg_results_TE_X2Y.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X = TE_knnksg(Y, X)
            lg_results_TE_Y2X.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_Y2X)


### REPLICATE 1/5 ###

## Dim =  1 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  2 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  3 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  4 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  5 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  6 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  7 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  8 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  9 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  10 #
# Sample size =  10000 #
# Sample size =  100000 #

### REPLICATE 2/5 ###

## Dim =  1 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  2 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  3 #
# Sample size =  10000 #
# Sample size =  100000 #
## Dim =  4 #
# Sample size =  10000 #
# Sample size =  100000 #


KeyboardInterrupt: 

## Joint Process

In [None]:
# Initialize the list of generators with one for each dimension
jp_generator_lst = [MVJointProcessSimulator(n_dim=dim, n_redundant_dim=dim-1) for dim in dim_range]
# Get the reference values
jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]
jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]

In [None]:
jp_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
jp_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, jp_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_knnksg(X, Y)
            jp_results_TE_X2Y.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X = TE_knnksg(Y, X)
            jp_results_TE_Y2X.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_Y2X)


### REPLICATE 1/10 ###

## Dim =  1 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  2 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  3 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  4 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  5 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  6 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  7 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  8 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  9 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  10 #
# Sample size =  1000 #
# Sample size =  10000 #

### REPLICATE 2/10 ###

## Dim =  1 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  2 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  3 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  4 #
# Sample size =  1000 #
# Sample size =  10000 #
## Dim =  5 #
# Sample size =  1000 #
# Sample size =  10000 #
## D

# Dimensionality Scaling without redundant dimensions

In [10]:
# Initialize the list of generators with one for each dimension
lg_generator_lst = [MVLinearGaussianSimulator(n_dim=dim) for dim in dim_range]
# Get the reference values
lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]
lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]

lg_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
lg_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, lg_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_knnksg(X, Y)
            print(f"TE(X->Y) = {TE_X2Y}")
            lg_results_TE_X2Y.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X =TE_knnksg(Y, X)
            print(f"TE(Y->X) = {TE_Y2X}")
            lg_results_TE_Y2X.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_Y2X)

lg_results_TE_X2Y.df.to_csv('results/knnksg/lg_results_TE_X2Y_dim.csv', index=False)
lg_results_TE_Y2X.df.to_csv('results/knnksg/lg_results_TE_Y2X_dim.csv', index=False)


### REPLICATE 1/5 ###

## Dim =  1 #
# Sample size =  10000 #
TE(X->Y) = 0.0014
TE(Y->X) = 0.1513
# Sample size =  100000 #
TE(X->Y) = 0.0022
TE(Y->X) = 0.1313
## Dim =  2 #
# Sample size =  10000 #
TE(X->Y) = 0.0032
TE(Y->X) = 0.225
# Sample size =  100000 #
TE(X->Y) = 0.0019
TE(Y->X) = 0.2184
## Dim =  3 #
# Sample size =  10000 #
TE(X->Y) = 0.0069
TE(Y->X) = 0.2474
# Sample size =  100000 #
TE(X->Y) = 0.0031
TE(Y->X) = 0.2505
## Dim =  4 #
# Sample size =  10000 #
TE(X->Y) = 0.0232
TE(Y->X) = 0.25
# Sample size =  100000 #
TE(X->Y) = 0.0151
TE(Y->X) = 0.2518
## Dim =  5 #
# Sample size =  10000 #
TE(X->Y) = 0.033
TE(Y->X) = 0.2512
# Sample size =  100000 #
TE(X->Y) = 0.0316
TE(Y->X) = 0.2506
## Dim =  6 #
# Sample size =  10000 #
TE(X->Y) = 0.0527
TE(Y->X) = 0.2478
# Sample size =  100000 #
TE(X->Y) = 0.049
TE(Y->X) = 0.2452
## Dim =  7 #
# Sample size =  10000 #
TE(X->Y) = 0.0754
TE(Y->X) = 0.2457
# Sample size =  100000 #
TE(X->Y) = 0.0722
TE(Y->X) = 0.2452
## Dim =  8 #
# Sample

In [20]:
# Initialize the list of generators with one for each dimension
jp_generator_lst = [MVJointProcessSimulator(n_dim=dim) for dim in dim_range]
# Get the reference values
jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]
jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]

jp_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
jp_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, jp_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_knnksg(X, Y)
            print(f"TE(X->Y) = {TE_X2Y}")
            jp_results_TE_X2Y.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X =TE_knnksg(Y, X)
            print(f"TE(Y->X) = {TE_Y2X}")
            jp_results_TE_Y2X.write(method='knnksg', n_dim=dim, sample_size=samples, value=TE_Y2X)

jp_results_TE_X2Y.df.to_csv('results/knnksg/jp_results_TE_X2Y_dim.csv', index=False)
jp_results_TE_Y2X.df.to_csv('results/knnksg/jp_results_TE_Y2X_dim.csv', index=False)


### REPLICATE 1/5 ###

## Dim =  1 #
# Sample size =  10000 #
TE(X->Y) = 0.4079
TE(Y->X) = -0.0212
# Sample size =  100000 #
TE(X->Y) = 0.4135
TE(Y->X) = 0.0033
## Dim =  2 #
# Sample size =  10000 #
TE(X->Y) = 0.6805
TE(Y->X) = 0.0154
# Sample size =  100000 #
TE(X->Y) = 0.6714
TE(Y->X) = 0.0042
## Dim =  3 #
# Sample size =  10000 #
TE(X->Y) = 0.7415
TE(Y->X) = 0.0023
# Sample size =  100000 #
TE(X->Y) = 0.7197
TE(Y->X) = 0.0005
## Dim =  4 #
# Sample size =  10000 #
TE(X->Y) = 0.7225
TE(Y->X) = -0.0029
# Sample size =  100000 #
TE(X->Y) = 0.7101
TE(Y->X) = -0.0015
## Dim =  5 #
# Sample size =  10000 #
TE(X->Y) = 0.7124
TE(Y->X) = -0.0032
# Sample size =  100000 #
TE(X->Y) = 0.69
TE(Y->X) = -0.0032
## Dim =  6 #
# Sample size =  10000 #
TE(X->Y) = 0.6772
TE(Y->X) = -0.0065
# Sample size =  100000 #
TE(X->Y) = 0.6692
TE(Y->X) = -0.0011
## Dim =  7 #
# Sample size =  10000 #
TE(X->Y) = 0.6727
TE(Y->X) = 0.0062
# Sample size =  100000 #
TE(X->Y) = 0.6514
TE(Y->X) = 0.0017
## Dim =  8 