In [1]:
import importlib
from metricas import metricas as m
importlib.reload(m)
import pandas as pd
import glob
import numpy as np
from scipy.spatial.distance import euclidean 

In [2]:
As = sorted(glob.glob("generated_csvs/A/*.csv"))
Bs = sorted(glob.glob("generated_csvs/B/*.csv"))
Cs = sorted(glob.glob("generated_csvs/C/*.csv"))

In [None]:
def compute_kld(fakes, original, bins):
    divergences = []
    for fake in fakes:
        df_a = pd.read_csv(fake)
        values = df_a['Watts'].values
        kld = m.kl_divergence(original, samples=[values], bins=bins)[0]
        divergences.append(np.round(kld, decimals=3))
    return divergences
def compute_js(fakes, original, bins):
    distances = []
    for fake in fakes:
        df_a = pd.read_csv(fake)
        values = df_a['Watts'].values
        js = m.js_divergence(original.reshape(-1,1), samples=[values.reshape(-1,1)], bins=bins)[0]
        distances.append(np.round(js, decimals=3))
    return distances

def compute_w1(fakes, original):
    distances = []
    for fake in fakes:
        df_a = pd.read_csv(fake)
        values = df_a['Watts'].values
        js = m.w_distance(original.reshape(-1,1), values.reshape(-1,1))[0]
        distances.append(np.round(js, decimals=3))

    return distances


# A

In [3]:
A = pd.read_csv("datasets/A.csv")
wattsA = A['Watts'].values

## KL-d

In [None]:
compute_kld(As, wattsA, bins=50)

[0.793, 0.729, 0.662]

## JS

In [None]:
compute_js(As,wattsA, 10)

[0.406, 0.395, 0.376]

In [None]:
compute_js(As,wattsA)

[0.441, 0.426, 0.408]

## $W_1$

In [None]:
compute_w1(As,wattsA)

[11.093, 10.835, 9.979]

# B

## KL-d

In [4]:
B = pd.read_csv("datasets/B.csv")
wattsB = B['Watts'].values

In [None]:
m.kl_divergence(wattsA, [wattsC], 300)

[1.5022796926563657]

In [None]:
compute_kld(Bs,wattsB)

[0.245, 0.283]

## JS

In [None]:
compute_js(Bs,wattsB)

[0.249, 0.275]

## W_1

In [None]:
compute_w1(Bs,wattsB)

[2.505, 3.588]

# C

In [5]:
C = pd.read_csv("datasets/C.csv")
wattsC = C['Watts'].values

## KL-d

In [None]:
compute_kld(Cs,wattsC)

[0.946, 1.02]

## JS

In [None]:
compute_js(Cs,wattsC)

[0.469, 0.487]

## W_1

In [None]:
compute_w1(Cs,wattsC)

[6.453, 7.121]

In [None]:
wattsA

array([ 213.56098153,  545.51480718,  466.54735864, ...,  849.40541403,
        650.72824722, 1021.73125806])

In [None]:
from fastdtw import fastdtw

In [None]:
distance, path = fastdtw(wattsA, wattsC, dist=euclidean)

# Test KL-d

In [7]:
originais = [wattsA, wattsB, wattsC]
falsosA = [pd.read_csv(f)['Watts'].values for f in As]
falsosB = [pd.read_csv(f)['Watts'].values for f in Bs]
falsosC = [pd.read_csv(f)['Watts'].values for f in Cs]

In [49]:
falsos = falsosA + falsosB + falsosC

In [109]:
fA = [[wattsA, wattsB, wattsC], falsosA, falsosB, falsosC]
fB = [[wattsA, wattsB, wattsC], falsosA, falsosB, falsosC]
fC = [[wattsA, wattsB, wattsC], falsosA, falsosB, falsosC]

In [115]:
casas = ["A", "B", "C"]
scoresA = []
for i, f in enumerate(fA):
    d1 = np.array(m.kl_divergence(originais[0], f, bins=20))
    d2 = []
    for a in f:
        kld = m.kl_divergence(a, [originais[0]], bins=20)[0]
        d2.append(kld)
    d2 = np.array(d2)
    scores = (d1+d2)/2
    scoresA = scoresA + list(scores)    
    print (scores.round(decimals=2), end="|")    

[0.   0.58 0.8 ]|[0.92 0.85 0.76]|[0.3  0.24]|[0.23 0.29]|

In [120]:
casas = ["A", "B", "C"]
scoresB = []
for i, f in enumerate(fB):
    d1 = np.array(m.kl_divergence(originais[1], f, bins=20))
    d2 = []
    for a in f:
        kld = m.kl_divergence(a, [originais[1]], bins=20)[0]
        d2.append(kld)
    d2 = np.array(d2)
    scores = (d1+d2)/2  
    scoresB = scoresB + list(scores)
    print (scores.round(decimals=2), end="|")    

[0.58 0.   0.1 ]|[1.91 2.01 1.72]|[0.28 0.4 ]|[0.63 0.72]|

In [118]:
casas = ["A", "B", "C"]
scores = []
for i, f in enumerate(fC):
    d1 = np.array(m.kl_divergence(originais[2], f, bins=20))
    d2 = []
    for a in f:
        kld = m.kl_divergence(a, [originais[2]], bins=20)[0]
        d2.append(kld)
    d2 = np.array(d2)
    scores = (d1+d2)/2
    scores = scores + list(scores)
    print (scores.round(decimals=2), end="|")    

[0.8 0.1 0. ]|[2.57 2.66 2.36]|[0.42 0.6 ]|[0.96 1.08]|

0.1, 0., 2.57, 2.66, 2.36, 0.42, 0.6, 0.96, 1.08

In [122]:
dict_scores = {
    'A':scoresA,
    'B':scoresB,
    'C':scores
}
dfscores = pd.DataFrame(dict_scores)

In [125]:
dfscores = dfscores.T

In [43]:
colunas = ["D", "E", "F", "G", "H", "I", "J"]

In [128]:
dfscores.columns = colunas

In [131]:
dfscores.round(decimals=2).T

Unnamed: 0,A,B,C
A,0.0,0.58,0.8
B,0.58,0.0,0.1
C,0.8,0.1,0.0
D,0.92,1.91,2.57
E,0.85,2.01,2.66
F,0.76,1.72,2.36
G,0.3,0.28,0.42
H,0.24,0.4,0.6
I,0.23,0.63,0.96
J,0.29,0.72,1.08


In [9]:
euclidean(wattsA, falsosA[0])

79490.6682222386

# Predictive Score

- Treinar uma RNN em cada casa
- Salvar o modelo
- Testar modelo em cada dataset sintético

In [None]:
# TODO: refazer códigos usando os datasets falsos como treino

In [3]:
from metricas import utils as u

2023-10-20 18:57:59.722106: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-20 18:57:59.959819: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-20 18:57:59.965778: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-10-20 18:57:59.965798: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if yo

In [4]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        dataX.append(a)
        dataY.append(dataset[i+look_back, :])
    return np.array(dataX), np.array(dataY)

In [5]:
A = pd.read_csv("datasets/A.csv")
B = pd.read_csv("datasets/B.csv")
C = pd.read_csv("datasets/C.csv")

wattsA = A['Watts'].values
wattsB = B['Watts'].values
wattsC = C['Watts'].values

wattsA = wattsA.reshape(-1,1)
wattsB = wattsB.reshape(-1,1)
wattsC = wattsC.reshape(-1,1)

In [10]:
def save_models(casa, arrays_csvs):
    lookback = 60
    for i, a in enumerate(u.tqdm(arrays_csvs)):
        fake = pd.read_csv(a)["Watts"].values
        fake = fake.reshape(-1,1)
        X, y = create_dataset(fake, lookback)
        rnn = u.make_rnn(32, n_layers=2, n_steps=60, net_type='LSTM')
        scalerTrain = u.MinMaxScaler().fit(X.reshape(-1,1))
        trainX = scalerTrain.transform(X.reshape(-1,1)).reshape(X.shape)
        trainY = scalerTrain.transform(y.reshape(-1,1)).reshape(y.shape)
        opt = u.Adam(learning_rate=5e-4)
        rnn.compile(optimizer=opt, loss='mse')
        es = u.EarlyStopping(monitor='loss', patience=2)
        hist = rnn.fit(trainX, trainY, batch_size=60, epochs=50, callbacks=[es], verbose=0)
        rnn.save(f"models/{casa}_{i}", save_format="h5")

In [13]:
save_models("casaC", Cs)

100%|██████████| 2/2 [05:13<00:00, 157.00s/it]


In [7]:
As

['generated_csvs/A/A_0.csv',
 'generated_csvs/A/A_1.csv',
 'generated_csvs/A/A_2.csv']

In [27]:
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_squared_error as mse

In [76]:
rnnA = load_model("models/casaA")
rnnB = load_model("models/casaB")
rnnC = load_model("models/casaC")

In [28]:
def score(modelos, originais=[]):
    for m in modelos:    
        rnn = load_model(m)
        scores = []
        lookback = 60
        for f in u.tqdm(originais, desc=f"Computing scores for model {m}"):            
            f = f.reshape(-1,1)
            # trainX, trainY = create_dataset(train, lookback)
            X_teste, y_teste = create_dataset(f, lookback)
            scaler = u.MinMaxScaler().fit(X_teste.reshape(-1,1))
            testX = scaler.transform(X_teste.reshape(-1,1)).reshape(X_teste.shape)
            testY = scaler.transform(y_teste.reshape(-1,1)).reshape(y_teste.shape)
            predY = rnn.predict(testX, verbose=0)
            sc = u.mean_absolute_error(testY, predY)
            scores.append(sc)
        nm_modelo = m.split("/")[-1]
        scores = np.array(scores)
        np.save(f"scores/{nm_modelo}.npy", scores)

In [25]:
modelos = sorted(glob.glob("models/*"))

In [29]:
score(modelos, originais=[wattsA, wattsB, wattsC])

Computing scores for model models/casaA_0: 100%|██████████| 3/3 [00:14<00:00,  4.74s/it]
Computing scores for model models/casaA_1: 100%|██████████| 3/3 [00:14<00:00,  4.98s/it]
Computing scores for model models/casaA_2: 100%|██████████| 3/3 [00:18<00:00,  6.08s/it]
Computing scores for model models/casaB_0: 100%|██████████| 3/3 [00:14<00:00,  4.86s/it]
Computing scores for model models/casaB_1: 100%|██████████| 3/3 [00:15<00:00,  5.05s/it]
Computing scores for model models/casaC_0: 100%|██████████| 3/3 [00:13<00:00,  4.52s/it]
Computing scores for model models/casaC_1: 100%|██████████| 3/3 [00:15<00:00,  5.09s/it]


In [33]:
f_scores = sorted(glob.glob("scores/*.npy"))

In [38]:
vetor_scores = []
for f in f_scores:
    a = np.load(f)
    a = a.round(decimals=3)
    vetor_scores.append(a)

In [40]:
df_scores = pd.DataFrame(vetor_scores)

In [44]:
colunas

['D', 'E', 'F', 'G', 'H', 'I', 'J']

In [45]:
df_scores.columns = ["A", "B", "C"]

In [48]:
df_scores['index'] = colunas

In [52]:
df_scores.set_index(keys='index', inplace=True)

In [67]:
df_scores.loc[['D', 'E', 'F']].mean(axis=0).round(decimals=4)

A    0.0603
B    0.0663
C    0.0633
dtype: float64

In [68]:
df_scores.loc[['G', 'H']].mean(axis=0).round(decimals=4)

A    0.0455
B    0.0360
C    0.0250
dtype: float64

In [69]:
df_scores.loc[['I', 'J']].mean(axis=0).round(decimals=4)

A    0.0480
B    0.0540
C    0.0475
dtype: float64