In [15]:
import sys
sys.path.append('..')
import numpy as np
import pandas as pd
import ast
import math
from analysis.directed_information import *
from analysis.simulations import VAR, NVAR
from analysis.utils import *

In [None]:
def evaluate_var(num, m, order, max_coef, t, estimators, subset_selection=None):
    coefficients = []
    di_exact = []
    estimates = []
    
    for i in num:
        var = VAR(m, order)
        var.generate(max_coef)
        timeseries = var.simulate(t)

        # exact
        coefficients.append(var.get_coefficients().tolist())
        di_exact.append(var.directed_information_graph().tolist())

        # estimate
        estimates_i = [directed_information_graph(timeseries, order=order, subset_selection=subset_selection, estimator=estimator).tolist() for estimator in estimators]
        estimates.append(estimates_i)

    return coefficients, di_exact, estimates
    

def classify(real, real_threshold, estimate, estimate_threshold):
    assert real.shape == estimate.shape, 'Dimensions of real and estimated DI values must match'

    TP, FP, TN, FN = 0, 0, 0, 0
    
    real = real.flatten()
    estimate = estimate.flatten()
    
    for real_di, estimated_di in zip(real, estimate):
        # classified as positive
        if estimated_di >= estimate_threshold:
            if real_di >= real_threshold:
                TP +=1
            else:
                FP += 1
        # classified as negative
        else:
            if real_di < real_threshold:
                TN += 1
            else:
                FN += 1

    return {'TP': TP, 'FP': FP, 'TN': TN, 'FN': FN}         

In [20]:
ts = [1000]
ms = [2, 5, 10]
n_coefs = [math.ceil(m/2) for m in ms]

num = 100

for t in ts:
    # prepare estimators
    kde = KDE()
    knn_sqrt = KNN(k=math.ceil(math.sqrt(t)))
    knn_sqrt_2 = KNN(k=math.ceil(math.sqrt(t) / 2))
    knn_sqrt_4 = KNN(k=math.ceil(math.sqrt(t) / 4))

    estimators = [kde, knn_sqrt, knn_sqrt_2, knn_sqrt_4]
    
    for m, n_coef in zip(ms, n_coefs):
        coefficients, di_exact, estimates = evaluate_var(num, m, order=1, n_coef, t, estimators, subset_selection):
        
        df = pd.DataFrame()
        df['coefficients'] = coefficients
        df['di_exact'] = di_exact
        estimates = np.array(estimates)
        df['kde'] = estimates[:, 0].tolist()
        df['knn_sqrt'] = estimates[:, 1].tolist()
        df['knn_sqrt_2'] = estimates[:, 2].tolist()
        df['knn_sqrt_4'] = estimates[:, 3].tolist()

        df.to_csv(f't{t}_m{m}.csv', index = False)

