In [65]:
import pickle
import os
import pandas as pd
from tqdm import tqdm
import numpy as np

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, balanced_accuracy_score

from d2c.benchmark import D2CWrapper

from d2c.descriptors.loader import DataLoader

from sklearn.ensemble import RandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier


In [66]:
with open('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/model (2).pkl', 'rb') as f:
    model = pickle.load(f)



CHOOSE THE BEST WAY TO LOAD THE DATASETS

In [132]:
# immport numpy
def load_data(filepath):
    data = np.loadtxt(filepath)
    return data

names = ['ex.txt', 'ex copy.txt', 'ex copy 2.txt']
ts_list = [load_data(os.path.join('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/useful_data', name)) for name in names]

In [68]:
#import pandas

def load_data(filepath):
    data = pd.read_csv(filepath, delimiter=',', header=None).values
    return data

# Example usage
names = ['preprocessed_1.txt', 'preprocessed_2.txt']
ts_list = [load_data(os.path.join('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/useful_data', name)) for name in names]


In [133]:
ts_list[0].shape, ts_list[1].shape, ts_list[2].shape

((1225, 48), (1225, 48), (1225, 48))

In [127]:
def inspect_data(data):
    for i, col in enumerate(data.T):
        unique_values = np.unique(col)
        if len(unique_values) == 1:
            print(f"Column {i} is constant with value {unique_values[0]}.")
        elif len(unique_values) == 0:
            print(f"Column {i} is empty.")

for ts in ts_list:
    inspect_data(ts)

In [102]:
# remove first row of each ts
ts_list = [ts[1:] for ts in ts_list]

In [103]:
# convert to numeric each ts
ts_list = [ts.astype(float) for ts in ts_list]

# remove rows with NaNs
ts_list = [ts[~np.isnan(ts).any(axis=1)] for ts in ts_list]

# remove columns with empty values
ts_list = [ts[:, ~np.all(np.isnan(ts), axis=0)] for ts in ts_list]

In [105]:
ts_list[0].shape, ts_list[1].shape

(0, 0)

In [134]:
for i, ts in enumerate(ts_list):
    print(f"Inspecting time series {i+1}:")
    print(ts.dtype)  # Check the data type of the array
    print(ts[:5])  # Print the first few rows to inspect

Inspecting time series 1:
float64
[[ 0.5  1.2  0.3 -0.4  2.1  0.9  0.5  1.2  0.3 -0.4  2.1  0.9  0.5  1.2
   0.3 -0.4  2.1  0.9  0.5  1.2  0.3 -0.4  2.1  0.9  0.5  1.2  0.3 -0.4
   2.1  0.9  0.5  1.2  0.3 -0.4  2.1  0.9  0.5  1.2  0.3 -0.4  2.1  0.9
   0.5  1.2  0.3 -0.4  2.1  0.9]
 [ 0.6  1.1  0.5 -0.3  2.3  1.   0.6  1.1  0.5 -0.3  2.3  1.   0.6  1.1
   0.5 -0.3  2.3  1.   0.6  1.1  0.5 -0.3  2.3  1.   0.6  1.1  0.5 -0.3
   2.3  1.   0.6  1.1  0.5 -0.3  2.3  1.   0.6  1.1  0.5 -0.3  2.3  1.
   0.6  1.1  0.5 -0.3  2.3  1. ]
 [ 0.4  1.3  0.4 -0.5  2.   0.8  0.4  1.3  0.4 -0.5  2.   0.8  0.4  1.3
   0.4 -0.5  2.   0.8  0.4  1.3  0.4 -0.5  2.   0.8  0.4  1.3  0.4 -0.5
   2.   0.8  0.4  1.3  0.4 -0.5  2.   0.8  0.4  1.3  0.4 -0.5  2.   0.8
   0.4  1.3  0.4 -0.5  2.   0.8]
 [ 0.7  1.   0.6 -0.2  2.2  0.9  0.7  1.   0.6 -0.2  2.2  0.9  0.7  1.
   0.6 -0.2  2.2  0.9  0.7  1.   0.6 -0.2  2.2  0.9  0.7  1.   0.6 -0.2
   2.2  0.9  0.7  1.   0.6 -0.2  2.2  0.9  0.7  1.   0.6 -0.2  2.2  0.9
   0.

In [114]:
def check_std(data):
    for i, col in enumerate(data.T):
        if np.std(col) == 0:
            print(f"Warning: Column {i} has zero standard deviation.")

for ts in ts_list:
    check_std(ts)


In [115]:
def normalize_data(data):
    std_dev = np.std(data, axis=0)
    # Avoid division by zero
    std_dev[std_dev == 0] = 1
    return (data - np.mean(data, axis=0)) / std_dev

# Apply this normalization before passing the data to the model
ts_list = [normalize_data(ts) for ts in ts_list]

In [135]:
ts_dict = dict(zip(names, ts_list))
for key, value in ts_dict.items():
    print(key, value.shape)

# save first dim of each ts in a list named 'maxlags'
maxlags = [ts.shape[0] for ts in ts_list]

# save second dim of each ts in a list named 'n_variables'
n_variables = [ts.shape[1] for ts in ts_list]


ex.txt (1225, 48)
ex copy.txt (1225, 48)
ex copy 2.txt (1225, 48)


In [136]:
d2cwrapper = D2CWrapper(ts_list=ts_list, 
                        n_variables=n_variables[0], 
                        model=model, 
                        maxlags=maxlags[0], 
                        n_jobs=1, 
                        full=True, 
                        quantiles=True,
                        filename='d2c_results',
                        normalize=True, 
                        cmi='original', 
                        mb_estimator='original')



In [138]:
d2cwrapper.run()

ValueError: x and y must have length at least 2.

In [None]:
causal_df = d2cwrapper.get_causal_dfs()