In [1]:
import pickle
import os
import pandas as pd
from tqdm import tqdm
import numpy as np
import joblib

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, balanced_accuracy_score

from d2c.benchmark import D2CWrapper

from d2c.descriptors.loader import DataLoader

from sklearn.ensemble import RandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score


In [5]:
model = joblib.load('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/model.pkl')
ts = np.loadtxt('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/data/temperature/temperature.txt', delimiter=',',skiprows=1, usecols=range(1, 3))

In [7]:
d2cwrapper = D2CWrapper(ts_list=[ts], 
                        n_variables=2, 
                        model=model, 
                        maxlags=1, 
                        n_jobs=1, 
                        full=True, 
                        quantiles=True,
                        filename='d2c_results',
                        normalize=True, 
                        cmi='original', 
                        mb_estimator='original')

d2cwrapper.run()

causal_df = d2cwrapper.get_causal_dfs()

Shape of X: (167, 3), Shape of Y: (167,)
Shape of X: (167, 3), Shape of Y: (167,)
Shape of X: (167, 3), Shape of Y: (167,)
Shape of X: (167, 3), Shape of Y: (167,)
Shape of X: (167, 3), Shape of Y: (167,)
Shape of X: (167, 3), Shape of Y: (167,)
Shape of X: (167, 3), Shape of Y: (167,)
Shape of X: (167, 3), Shape of Y: (167,)


In [8]:
causal_df

{0:    from  to effect p_value  probability  is_causal
 0     3   1   None    None         0.22      False
 1     2   0   None    None         0.52       True
 2     2   1   None    None         0.18      False
 3     3   0   None    None         0.24      False}

In [9]:
df = causal_df[0]
# order df by 'from' and 'by' columns
df = df.sort_values(by=['from', 'to'])
df

Unnamed: 0,from,to,effect,p_value,probability,is_causal
1,2,0,,,0.52,True
2,2,1,,,0.18,False
3,3,0,,,0.24,False
0,3,1,,,0.22,False


In [10]:
df.to_csv('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/data/temperature/results/causal_df.csv', index=False)

In [11]:
# load a dataset as dataframe
ts2 = pd.read_csv('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/data/temperature/temperature.txt')

# drop the first column
ts2 = ts2.drop(columns=ts2.columns[0])

# list the names in the first row
names = ts2.columns

# associate a number to each name
name_to_number = {name: i+1 for i, name in enumerate(names)}

name_to_number

{'Indoor temperature': 1, 'Outdoor temperature': 2}

In [12]:
# show levels of 'from' in df
df['from'].unique()

array([2, 3])

In [13]:
df['to'].unique()

array([0, 1])

In [14]:
mapping = {2: 1, 3: 2}

# Apply the mapping
df['from'] = df['from'].replace(mapping)

In [15]:
mapping = {0: 1, 1:2}

# Apply the mapping
df['to'] = df['to'].replace(mapping)

In [16]:
df

Unnamed: 0,from,to,effect,p_value,probability,is_causal
1,1,1,,,0.52,True
2,1,2,,,0.18,False
3,2,1,,,0.24,False
0,2,2,,,0.22,False


In [17]:
# show only df rows that have 'is_causal' == True
df[df['is_causal'] == True]

Unnamed: 0,from,to,effect,p_value,probability,is_causal
1,1,1,,,0.52,True


In [18]:
# take only the columns 'from', 'to'
caus = df[df['is_causal'] == True][['from', 'to']]

number_to_name = {v: k for k, v in name_to_number.items()}

# apply the mapping
caus['from'] = caus['from'].replace(number_to_name)
caus['to'] = caus['to'].replace(number_to_name)

caus

Unnamed: 0,from,to
1,Indoor temperature,Indoor temperature


In [19]:
# save caus to a csv file
caus.to_csv('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/data/temperature/results/causal_relations.csv', index=False)

In [20]:
# load a txt file as dataframe
gt = pd.read_csv('/home/jpalombarini/td2c/notebooks/contributions/Real_data_validation/data/temperature/ground_truth.txt')
gt

Unnamed: 0,From -> To
0,Outdoor temperature -> Indoor temperature


In [21]:
# merge column 'from' with column 'to' of caus to create a new column 'From -> To'
caus['From -> To'] = caus['from'] + ' -> ' + caus['to']
caus = caus.drop(columns=['from', 'to'])
caus

Unnamed: 0,From -> To
1,Indoor temperature -> Indoor temperature


In [22]:
print(f'Numbers of correctly estimated causal paths: {sum(caus["From -> To"].isin(gt["From -> To"]))} / {gt.shape[0]}'), 
print(f'Percentage of correctly estimated causal paths: {round((sum(caus["From -> To"].isin(gt["From -> To"])) / gt.shape[0]) * 100, 2)}%')

Numbers of correctly estimated causal paths: 0 / 1
Percentage of correctly estimated causal paths: 0.0%
