In [10]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

from statsmodels.tsa.vector_ar.var_model import VARProcess

random_generator = np.random.default_rng(100)


In [11]:
def five_ancestor_structure(lags, nvar):
    flag = False
    matrix = np.zeros((lags, nvar, nvar))
    # forall dim2, choose 5 pairs (lag,var) to activate
    for target in range(nvar):
        candidate = list(range(nvar))
        candidate.remove(target)
        for k in range(5):
            source = random_generator.choice(candidate)
            candidate.remove(source)
            lag = random_generator.choice(range(lags))
            matrix[lag, target, source] = (0.5+random_generator.random()*0.4) * random_generator.choice([-1,1])
        for lag in range(lags):
            matrix[lag, target, target] = (0.5+random_generator.random()*0.4) * random_generator.choice([-1,1])
    while not flag:
        matrix = matrix*0.95
        process = VARProcess(matrix, None, np.identity(nvar))
        flag = process.is_stable()
    return VARProcess(matrix, None, np.identity(nvar))


def build_ground_truth(df, process):
    all_relations = []
    for var in df.columns:
        matrix = process.coefs
        matrix = matrix[:, int(var), :] != 0.0
        lags, v = matrix.nonzero()
        for i in range(len(v)):
            source,effect,lag = str(v[i]), var, lags[i]+1 # the lag is set in such a way.
            all_relations.append((source,effect,lag))
    return all_relations
    
    
lags = 5
process = five_ancestor_structure(lags, 10)
initial_values = random_generator.random(size=(lags,10))
data = process.simulate_var(steps=4500, seed=0, initial_values=initial_values)[-3500:]
df = pd.DataFrame(data)
df.columns = [str(x) for x in df.columns]

In [12]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-1.431313,0.851394,3.69621,-1.101658,-1.892232,-0.360604,1.355246,3.786136,-1.002231,-2.084142
1,-2.852313,-0.781119,1.975639,-1.676623,-1.018953,2.265764,-0.122705,1.190588,-0.708365,-0.252199
2,0.463805,1.558924,0.228395,-0.39834,-2.617672,0.761834,0.461621,2.855778,1.832045,4.413176
3,-1.289895,-0.399185,-0.348317,-0.827153,0.051859,1.328197,-0.23883,-0.867105,1.640613,-1.511691
4,-1.350532,-1.232358,0.982548,-1.761178,-0.043207,-0.160845,-0.340894,-1.193344,-0.945618,-1.456363


In [13]:
ground_truth = pd.DataFrame(np.array(build_ground_truth(df,process)))

In [14]:
df.to_csv("./returns/data_2.csv",index=False)

In [15]:
ground_truth.to_csv("./ground_truths/data_2.csv",index=False,header=False)

In [16]:
ground_truth

Unnamed: 0,0,1,2
0,0,0,1
1,8,0,1
2,0,0,2
3,0,0,3
4,3,0,3
...,...,...,...
95,9,9,3
96,5,9,4
97,9,9,4
98,8,9,5
