In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

In [2]:
from util_results import Results

In [3]:
from te_datasim.jointprocess import MVJointProcessSimulator
from te_datasim.lineargaussian import MVLinearGaussianSimulator

In [4]:
import torch; torch.set_printoptions(sci_mode=None)
# Check if CUDA is available
if torch.cuda.is_available():
    compute_device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    compute_device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

CUDA is available. Using GPU.


In [5]:
from reference_njee import TE_njee
EPOCHS = 5001
BATCH_SIZE = 10000
N_BINS = 25

# Basic Validity

In [6]:
REPLICATES = 5
SAMPLE_SIZE = 10000

## Linear Gaussian

In [7]:
# Specify the range of lambda values to test
lg_lambda_range = list(np.linspace(0, 1, 9, endpoint=True))

# Initialize the list of generators with one for each lambda value
lg_generator_lst = [MVLinearGaussianSimulator(n_dim=1, coupling=lam) for lam in lg_lambda_range]

# get the reference values
lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]
lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]

In [8]:
lg_results_TE_X2Y = Results(columns=['method', 'coupling'])
lg_results_TE_Y2X = Results(columns=['method', 'coupling'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for lam, generator in zip(lg_lambda_range, lg_generator_lst):
        print("# Coupling = ", lam, "#")
        # Simulate data
        X, Y = generator.simulate(time=SAMPLE_SIZE, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        lg_results_TE_X2Y.write(method='njee', coupling=lam, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        lg_results_TE_Y2X.write(method='njee', coupling=lam, value=TE_Y2X)

lg_results_TE_X2Y.df.to_csv('results/njee/lg_results_TE_X2Y_bv.csv', index=False)
lg_results_TE_Y2X.df.to_csv('results/njee/lg_results_TE_Y2X_bv.csv', index=False)


### REPLICATE 1/5 ###

# Coupling =  0.0 #
H(Y|Y-) = 2.5162, H(Y|X-,Y-) = 2.503, TE(X->Y) = 0.0132   
H(Y|Y-) = 2.0351, H(Y|X-,Y-) = 2.028, TE(X->Y) = 0.0071   
# Coupling =  0.125 #
H(Y|Y-) = 2.5176, H(Y|X-,Y-) = 2.4993, TE(X->Y) = 0.0183  
H(Y|Y-) = 2.042, H(Y|X-,Y-) = 2.0272, TE(X->Y) = 0.0148  
# Coupling =  0.25 #
H(Y|Y-) = 2.5159, H(Y|X-,Y-) = 2.5065, TE(X->Y) = 0.0094  
H(Y|Y-) = 2.0287, H(Y|X-,Y-) = 1.9881, TE(X->Y) = 0.0406  
# Coupling =  0.375 #
H(Y|Y-) = 2.5172, H(Y|X-,Y-) = 2.5052, TE(X->Y) = 0.012   
H(Y|Y-) = 2.0284, H(Y|X-,Y-) = 1.9484, TE(X->Y) = 0.08    
# Coupling =  0.5 #
H(Y|Y-) = 2.518, H(Y|X-,Y-) = 2.5073, TE(X->Y) = 0.0107  
H(Y|Y-) = 2.0273, H(Y|X-,Y-) = 1.9006, TE(X->Y) = 0.1267  
# Coupling =  0.625 #
H(Y|Y-) = 2.5169, H(Y|X-,Y-) = 2.5075, TE(X->Y) = 0.0094  
H(Y|Y-) = 2.0182, H(Y|X-,Y-) = 1.8323, TE(X->Y) = 0.1859  
# Coupling =  0.75 #
H(Y|Y-) = 2.5166, H(Y|X-,Y-) = 2.503, TE(X->Y) = 0.0136   
H(Y|Y-) = 2.0164, H(Y|X-,Y-) = 1.7722, TE(X->Y) = 0.2442  
# Co

In [9]:
# Specify the range of lambda values to test
jp_lambda_range = list(np.linspace(-3, 3, 9, endpoint=True))

# Initialize the list of generators with one for each lambda value
jp_generator_lst = [MVJointProcessSimulator(n_dim=1, lam=lam) for lam in jp_lambda_range]

# get the reference values
jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]
jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]

In [10]:
jp_results_TE_X2Y = Results(columns=['method', 'coupling'])
jp_results_TE_Y2X = Results(columns=['method', 'coupling'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for lam, generator in zip(jp_lambda_range, jp_generator_lst):
        print("# Coupling = ", lam, "#")
        # Simulate data
        X, Y = generator.simulate(time=SAMPLE_SIZE, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        jp_results_TE_X2Y.write(method='njee', coupling=lam, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        jp_results_TE_Y2X.write(method='njee', coupling=lam, value=TE_Y2X)

jp_results_TE_X2Y.df.to_csv('results/njee/jp_results_TE_X2Y_bv.csv', index=False)
jp_results_TE_Y2X.df.to_csv('results/njee/jp_results_TE_Y2X_bv.csv', index=False)


### REPLICATE 1/5 ###

# Coupling =  -3.0 #
H(Y|Y-) = 2.5414, H(Y|X-,Y-) = 1.7294, TE(X->Y) = 0.812   
H(Y|Y-) = 2.5387, H(Y|X-,Y-) = 2.5242, TE(X->Y) = 0.0145  
# Coupling =  -2.25 #
H(Y|Y-) = 2.5407, H(Y|X-,Y-) = 1.7392, TE(X->Y) = 0.8015  
H(Y|Y-) = 2.5383, H(Y|X-,Y-) = 2.5248, TE(X->Y) = 0.0135  
# Coupling =  -1.5 #
H(Y|Y-) = 2.542, H(Y|X-,Y-) = 1.7881, TE(X->Y) = 0.7539  
H(Y|Y-) = 2.5386, H(Y|X-,Y-) = 2.5238, TE(X->Y) = 0.0148  
# Coupling =  -0.75 #
H(Y|Y-) = 2.5388, H(Y|X-,Y-) = 1.9164, TE(X->Y) = 0.6224  
H(Y|Y-) = 2.5391, H(Y|X-,Y-) = 2.5234, TE(X->Y) = 0.0157  
# Coupling =  0.0 #
H(Y|Y-) = 2.5305, H(Y|X-,Y-) = 2.1384, TE(X->Y) = 0.3921  
H(Y|Y-) = 2.5391, H(Y|X-,Y-) = 2.5262, TE(X->Y) = 0.0129  
# Coupling =  0.75 #
H(Y|Y-) = 2.602, H(Y|X-,Y-) = 2.4201, TE(X->Y) = 0.1819  
H(Y|Y-) = 2.538, H(Y|X-,Y-) = 2.5268, TE(X->Y) = 0.0112  
# Coupling =  1.5 #
H(Y|Y-) = 2.5999, H(Y|X-,Y-) = 2.542, TE(X->Y) = 0.0579   
H(Y|Y-) = 2.5381, H(Y|X-,Y-) = 2.5304, TE(X->Y) = 0.0077  
# Coup

# Sample size scaling

In [12]:
lg_generator = MVLinearGaussianSimulator(n_dim=1, coupling=0.5)
jp_generator = MVJointProcessSimulator(n_dim=1, lam=0.0)
sample_sizes = [500, 1000, 5000, 10000, 50000, 100000]

In [13]:
lg_results_TE_X2Y = Results(columns=['method', 'sample_size'])
lg_results_TE_Y2X = Results(columns=['method', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for samples in sample_sizes:
        print("# Samples = ", samples, "#")
        # Simulate data
        X, Y = lg_generator.simulate(time=samples, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        lg_results_TE_X2Y.write(method='njee', sample_size=samples, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        lg_results_TE_Y2X.write(method='njee', sample_size=samples, value=TE_Y2X)

lg_results_TE_X2Y.df.to_csv('results/njee/lg_results_TE_X2Y_ss.csv', index=False)
lg_results_TE_Y2X.df.to_csv('results/njee/lg_results_TE_Y2X_ss.csv', index=False)


### REPLICATE 1/5 ###

# Samples =  500 #
H(Y|Y-) = 2.5722, H(Y|X-,Y-) = 2.2519, TE(X->Y) = 0.3203  
H(Y|Y-) = 2.1984, H(Y|X-,Y-) = 1.8299, TE(X->Y) = 0.3685  
# Samples =  1000 #
H(Y|Y-) = 2.6864, H(Y|X-,Y-) = 2.51, TE(X->Y) = 0.1764    
H(Y|Y-) = 2.2154, H(Y|X-,Y-) = 1.9957, TE(X->Y) = 0.2197  
# Samples =  5000 #
H(Y|Y-) = 2.5734, H(Y|X-,Y-) = 2.5471, TE(X->Y) = 0.0263  
H(Y|Y-) = 2.1107, H(Y|X-,Y-) = 1.9809, TE(X->Y) = 0.1298  
# Samples =  10000 #
H(Y|Y-) = 2.516, H(Y|X-,Y-) = 2.5065, TE(X->Y) = 0.0095  
H(Y|Y-) = 2.0279, H(Y|X-,Y-) = 1.8981, TE(X->Y) = 0.1298  
# Samples =  50000 #
H(Y|Y-) = 2.4853, H(Y|X-,Y-) = 2.4836, TE(X->Y) = 0.0017  
H(Y|Y-) = 1.8095, H(Y|X-,Y-) = 1.688, TE(X->Y) = 0.1215   
# Samples =  100000 #
H(Y|Y-) = 2.4662, H(Y|X-,Y-) = 2.4655, TE(X->Y) = 0.0007  
H(Y|Y-) = 1.8072, H(Y|X-,Y-) = 1.6888, TE(X->Y) = 0.1184  

### REPLICATE 2/5 ###

# Samples =  500 #
H(Y|Y-) = 2.411, H(Y|X-,Y-) = 2.0423, TE(X->Y) = 0.3687  
H(Y|Y-) = 2.279, H(Y|X-,Y-) = 1.9294, TE(X->Y

In [14]:
jp_results_TE_X2Y = Results(columns=['method', 'sample_size'])
jp_results_TE_Y2X = Results(columns=['method', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for samples in sample_sizes:
        print("# Samples = ", samples, "#")
        # Simulate data
        X, Y = jp_generator.simulate(time=samples, seed=r)
        # Estimate X -> Y
        TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        jp_results_TE_X2Y.write(method='njee', sample_size=samples, value=TE_X2Y)
        # Estimate Y -> X
        TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
        jp_results_TE_Y2X.write(method='njee', sample_size=samples, value=TE_Y2X)

jp_results_TE_X2Y.df.to_csv('results/njee/jp_results_TE_X2Y_ss.csv', index=False)
jp_results_TE_Y2X.df.to_csv('results/njee/jp_results_TE_Y2X_ss.csv', index=False)


### REPLICATE 1/5 ###

# Samples =  500 #
H(Y|Y-) = 2.4875, H(Y|X-,Y-) = 1.5899, TE(X->Y) = 0.8976  
H(Y|Y-) = 2.5318, H(Y|X-,Y-) = 2.1453, TE(X->Y) = 0.3865  
# Samples =  1000 #
H(Y|Y-) = 2.6888, H(Y|X-,Y-) = 1.8736, TE(X->Y) = 0.8152  
H(Y|Y-) = 2.6333, H(Y|X-,Y-) = 2.4562, TE(X->Y) = 0.1771  
# Samples =  5000 #
H(Y|Y-) = 2.6888, H(Y|X-,Y-) = 1.8665, TE(X->Y) = 0.8223  
H(Y|Y-) = 2.7449, H(Y|X-,Y-) = 2.709, TE(X->Y) = 0.0359   
# Samples =  10000 #
H(Y|Y-) = 2.5428, H(Y|X-,Y-) = 1.728, TE(X->Y) = 0.8148   
H(Y|Y-) = 2.5367, H(Y|X-,Y-) = 2.523, TE(X->Y) = 0.0137   
# Samples =  50000 #
H(Y|Y-) = 2.4524, H(Y|X-,Y-) = 1.6499, TE(X->Y) = 0.8025  
H(Y|Y-) = 2.4333, H(Y|X-,Y-) = 2.4311, TE(X->Y) = 0.0022  
# Samples =  100000 #
H(Y|Y-) = 2.4456, H(Y|X-,Y-) = 1.6408, TE(X->Y) = 0.8048  
H(Y|Y-) = 2.447, H(Y|X-,Y-) = 2.4469, TE(X->Y) = 0.0001  

### REPLICATE 2/5 ###

# Samples =  500 #
H(Y|Y-) = 2.4597, H(Y|X-,Y-) = 1.5372, TE(X->Y) = 0.9225  
H(Y|Y-) = 2.4229, H(Y|X-,Y-) = 2.0803, TE(X-

# Dimensionality Scaling with redundant dimensions

In [16]:
dim_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sample_sizes = [10000, 100000]

## Linear Gaussian

In [17]:
# Initialize the list of generators with one for each dimension
lg_generator_lst = [MVLinearGaussianSimulator(n_dim=dim, n_redundant_dim=dim-1) for dim in dim_range]
# Get the reference values
lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]
lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]

In [18]:
lg_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
lg_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, lg_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            lg_results_TE_X2Y.write(method='njee', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            lg_results_TE_Y2X.write(method='njee', n_dim=dim, sample_size=samples, value=TE_Y2X)

lg_results_TE_X2Y.df.to_csv('results/njee/lg_results_TE_X2Y_dimred.csv', index=False)
lg_results_TE_Y2X.df.to_csv('results/njee/lg_results_TE_Y2X_dimred.csv', index=False)


### REPLICATE 1/5 ###

## Dim =  1 #
# Sample size =  10000 #
H(Y|Y-) = 2.5174, H(Y|X-,Y-) = 2.5077, TE(X->Y) = 0.0097  
H(Y|Y-) = 2.027, H(Y|X-,Y-) = 1.8983, TE(X->Y) = 0.1287  
# Sample size =  100000 #
H(Y|Y-) = 2.4663, H(Y|X-,Y-) = 2.4654, TE(X->Y) = 0.0009  
H(Y|Y-) = 1.8079, H(Y|X-,Y-) = 1.6884, TE(X->Y) = 0.1195  
## Dim =  2 #
# Sample size =  10000 #
H(Y|Y-) = 5.0677, H(Y|X-,Y-) = 5.0411, TE(X->Y) = 0.0266  
H(Y|Y-) = 4.4901, H(Y|X-,Y-) = 4.3457, TE(X->Y) = 0.1444  
# Sample size =  100000 #
H(Y|Y-) = 4.9438, H(Y|X-,Y-) = 4.9458, TE(X->Y) = -0.002  
H(Y|Y-) = 4.3512, H(Y|X-,Y-) = 4.2275, TE(X->Y) = 0.1237  
## Dim =  3 #
# Sample size =  10000 #
H(Y|Y-) = 7.5649, H(Y|X-,Y-) = 7.522, TE(X->Y) = 0.0429   
H(Y|Y-) = 7.0837, H(Y|X-,Y-) = 6.9237, TE(X->Y) = 0.16    
# Sample size =  100000 #
H(Y|Y-) = 7.3036, H(Y|X-,Y-) = 7.2913, TE(X->Y) = 0.0123  
H(Y|Y-) = 6.8025, H(Y|X-,Y-) = 6.6765, TE(X->Y) = 0.126   
## Dim =  4 #
# Sample size =  10000 #
H(Y|Y-) = 9.9883, H(Y|X-,Y-) = 9.92

## Joint Process

In [19]:
# Initialize the list of generators with one for each dimension
jp_generator_lst = [MVJointProcessSimulator(n_dim=dim, n_redundant_dim=dim-1, lam = 0.0) for dim in dim_range]
# Get the reference values
jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]
jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]

In [20]:
jp_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
jp_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, jp_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            jp_results_TE_X2Y.write(method='njee', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            jp_results_TE_Y2X.write(method='njee', n_dim=dim, sample_size=samples, value=TE_Y2X)

jp_results_TE_X2Y.df.to_csv('results/njee/jp_results_TE_X2Y_dimred.csv', index=False)
jp_results_TE_Y2X.df.to_csv('results/njee/jp_results_TE_Y2X_dimred.csv', index=False)


### REPLICATE 1/5 ###

## Dim =  1 #
# Sample size =  10000 #
H(Y|Y-) = 2.5395, H(Y|X-,Y-) = 1.7288, TE(X->Y) = 0.8107  
H(Y|Y-) = 2.5377, H(Y|X-,Y-) = 2.5237, TE(X->Y) = 0.014   
# Sample size =  100000 #
H(Y|Y-) = 2.4457, H(Y|X-,Y-) = 1.6411, TE(X->Y) = 0.8046  
H(Y|Y-) = 2.447, H(Y|X-,Y-) = 2.4468, TE(X->Y) = 0.0002  
## Dim =  2 #
# Sample size =  10000 #
H(Y|Y-) = 5.0888, H(Y|X-,Y-) = 4.2757, TE(X->Y) = 0.8131  
H(Y|Y-) = 4.995, H(Y|X-,Y-) = 4.9758, TE(X->Y) = 0.0192  
# Sample size =  100000 #
H(Y|Y-) = 4.9215, H(Y|X-,Y-) = 4.1166, TE(X->Y) = 0.8049  
H(Y|Y-) = 4.9913, H(Y|X-,Y-) = 4.9904, TE(X->Y) = 0.0009  
## Dim =  3 #
# Sample size =  10000 #
H(Y|Y-) = 7.5947, H(Y|X-,Y-) = 6.7529, TE(X->Y) = 0.8418  
H(Y|Y-) = 7.5849, H(Y|X-,Y-) = 7.5462, TE(X->Y) = 0.0387  
# Sample size =  100000 #
H(Y|Y-) = 7.2791, H(Y|X-,Y-) = 6.4735, TE(X->Y) = 0.8056  
H(Y|Y-) = 7.4352, H(Y|X-,Y-) = 7.434, TE(X->Y) = 0.0012   
## Dim =  4 #
# Sample size =  10000 #
H(Y|Y-) = 10.0116, H(Y|X-,Y-) = 9.14

# Dimensionality Scaling without redundant dimensions

In [22]:
dim_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
sample_sizes = [10000, 100000]

## Linear Gaussian

In [23]:
# Initialize the list of generators with one for each dimension
lg_generator_lst = [MVLinearGaussianSimulator(n_dim=dim) for dim in dim_range]
# Get the reference values
lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]
lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]

In [24]:
lg_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
lg_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, lg_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            lg_results_TE_X2Y.write(method='njee', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            lg_results_TE_Y2X.write(method='njee', n_dim=dim, sample_size=samples, value=TE_Y2X)

lg_results_TE_X2Y.df.to_csv('results/njee/lg_results_TE_X2Y_dim.csv', index=False)
lg_results_TE_Y2X.df.to_csv('results/njee/lg_results_TE_Y2X_dim.csv', index=False)


### REPLICATE 1/5 ###

## Dim =  1 #
# Sample size =  10000 #
H(Y|Y-) = 2.5175, H(Y|X-,Y-) = 2.5037, TE(X->Y) = 0.0138  
H(Y|Y-) = 2.0275, H(Y|X-,Y-) = 1.8985, TE(X->Y) = 0.129   
# Sample size =  100000 #
H(Y|Y-) = 2.4663, H(Y|X-,Y-) = 2.4655, TE(X->Y) = 0.0008  
H(Y|Y-) = 1.8077, H(Y|X-,Y-) = 1.6889, TE(X->Y) = 0.1188  
## Dim =  2 #
# Sample size =  10000 #
H(Y|Y-) = 5.0046, H(Y|X-,Y-) = 4.9689, TE(X->Y) = 0.0357  
H(Y|Y-) = 3.9193, H(Y|X-,Y-) = 3.6659, TE(X->Y) = 0.2534  
# Sample size =  100000 #
H(Y|Y-) = 4.9045, H(Y|X-,Y-) = 4.8989, TE(X->Y) = 0.0056  
H(Y|Y-) = 3.6261, H(Y|X-,Y-) = 3.3874, TE(X->Y) = 0.2387  
## Dim =  3 #
# Sample size =  10000 #
H(Y|Y-) = 7.4832, H(Y|X-,Y-) = 7.4299, TE(X->Y) = 0.0533  
H(Y|Y-) = 5.9056, H(Y|X-,Y-) = 5.5118, TE(X->Y) = 0.3938  
# Sample size =  100000 #
H(Y|Y-) = 7.2293, H(Y|X-,Y-) = 7.2254, TE(X->Y) = 0.0039  
H(Y|Y-) = 5.5031, H(Y|X-,Y-) = 5.1409, TE(X->Y) = 0.3622  
## Dim =  4 #
# Sample size =  10000 #
H(Y|Y-) = 9.8734, H(Y|X-,Y-) = 9.7

## Joint Process

In [25]:
# Initialize the list of generators with one for each dimension
jp_generator_lst = [MVJointProcessSimulator(n_dim=dim, lam=0.0) for dim in dim_range]
# Get the reference values
jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]
jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]

In [26]:
jp_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])
jp_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])

for r in range(REPLICATES):
    print(f"\n### REPLICATE {r+1}/{REPLICATES} ###\n")
    for dim, generator in zip(dim_range, jp_generator_lst):
        print("## Dim = ", dim, "#")
        for samples in sample_sizes:
            print("# Sample size = ", samples, "#")
            # Simulate data
            X, Y = generator.simulate(time=samples, seed=r)
            # Estimate X -> Y
            TE_X2Y = TE_njee(X, Y, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            jp_results_TE_X2Y.write(method='njee', n_dim=dim, sample_size=samples, value=TE_X2Y)
            # Estimate Y -> X
            TE_Y2X = TE_njee(Y, X, device=compute_device, n_bins=N_BINS, epochs=EPOCHS, batch_size=BATCH_SIZE)
            jp_results_TE_Y2X.write(method='njee', n_dim=dim, sample_size=samples, value=TE_Y2X)

jp_results_TE_X2Y.df.to_csv('results/njee/jp_results_TE_X2Y_dim.csv', index=False)
jp_results_TE_Y2X.df.to_csv('results/njee/jp_results_TE_Y2X_dim.csv', index=False)


### REPLICATE 1/5 ###

## Dim =  1 #
# Sample size =  10000 #
H(Y|Y-) = 2.541, H(Y|X-,Y-) = 1.7312, TE(X->Y) = 0.8098  
H(Y|Y-) = 2.5376, H(Y|X-,Y-) = 2.5255, TE(X->Y) = 0.0121  
# Sample size =  100000 #
H(Y|Y-) = 2.4456, H(Y|X-,Y-) = 1.6413, TE(X->Y) = 0.8043  
H(Y|Y-) = 2.447, H(Y|X-,Y-) = 2.4468, TE(X->Y) = 0.0002  
## Dim =  2 #
# Sample size =  10000 #
H(Y|Y-) = 5.1318, H(Y|X-,Y-) = 3.5155, TE(X->Y) = 1.6163  
H(Y|Y-) = 5.1038, H(Y|X-,Y-) = 5.0731, TE(X->Y) = 0.0307  
# Sample size =  100000 #
H(Y|Y-) = 4.8954, H(Y|X-,Y-) = 3.2917, TE(X->Y) = 1.6037  
H(Y|Y-) = 4.9385, H(Y|X-,Y-) = 4.9366, TE(X->Y) = 0.0019  
## Dim =  3 #
# Sample size =  10000 #
H(Y|Y-) = 7.7324, H(Y|X-,Y-) = 5.2934, TE(X->Y) = 2.439   
H(Y|Y-) = 7.6316, H(Y|X-,Y-) = 7.5887, TE(X->Y) = 0.0429  
# Sample size =  100000 #
H(Y|Y-) = 7.2769, H(Y|X-,Y-) = 4.8827, TE(X->Y) = 2.3942  
H(Y|Y-) = 7.3603, H(Y|X-,Y-) = 7.3564, TE(X->Y) = 0.0039  
## Dim =  4 #
# Sample size =  10000 #
H(Y|Y-) = 10.2521, H(Y|X-,Y-) = 7.00