In [1]:
import importlib
from metricas import metricas as m
importlib.reload(m)
import pandas as pd
import glob
import numpy as np
from dtw import dtw
from scipy.spatial.distance import euclidean 

In [2]:
As = sorted(glob.glob("generated_csvs/A/*.csv"))
Bs = sorted(glob.glob("generated_csvs/B/*.csv"))
Cs = sorted(glob.glob("generated_csvs/C/*.csv"))

In [None]:
def compute_kld(fakes, original, bins):
    divergences = []
    for fake in fakes:
        df_a = pd.read_csv(fake)
        values = df_a['Watts'].values
        kld = m.kl_divergence(original, samples=[values], bins=bins)[0]
        divergences.append(np.round(kld, decimals=3))
    return divergences
def compute_js(fakes, original, bins):
    distances = []
    for fake in fakes:
        df_a = pd.read_csv(fake)
        values = df_a['Watts'].values
        js = m.js_divergence(original.reshape(-1,1), samples=[values.reshape(-1,1)], bins=bins)[0]
        distances.append(np.round(js, decimals=3))
    return distances

def compute_w1(fakes, original):
    distances = []
    for fake in fakes:
        df_a = pd.read_csv(fake)
        values = df_a['Watts'].values
        js = m.w_distance(original.reshape(-1,1), values.reshape(-1,1))[0]
        distances.append(np.round(js, decimals=3))

    return distances


# A

In [3]:
A = pd.read_csv("datasets/A.csv")
wattsA = A['Watts'].values

## KL-d

In [None]:
compute_kld(As, wattsA, bins=50)

[0.793, 0.729, 0.662]

## JS

In [None]:
compute_js(As,wattsA, 10)

[0.406, 0.395, 0.376]

In [None]:
compute_js(As,wattsA)

[0.441, 0.426, 0.408]

## $W_1$

In [None]:
compute_w1(As,wattsA)

[11.093, 10.835, 9.979]

# B

## KL-d

In [4]:
B = pd.read_csv("datasets/B.csv")
wattsB = B['Watts'].values

In [None]:
m.kl_divergence(wattsA, [wattsC], 300)

[1.5022796926563657]

In [None]:
compute_kld(Bs,wattsB)

[0.245, 0.283]

## JS

In [None]:
compute_js(Bs,wattsB)

[0.249, 0.275]

## W_1

In [None]:
compute_w1(Bs,wattsB)

[2.505, 3.588]

# C

In [5]:
C = pd.read_csv("datasets/C.csv")
wattsC = C['Watts'].values

## KL-d

In [None]:
compute_kld(Cs,wattsC)

[0.946, 1.02]

## JS

In [None]:
compute_js(Cs,wattsC)

[0.469, 0.487]

## W_1

In [None]:
compute_w1(Cs,wattsC)

[6.453, 7.121]

In [None]:
wattsA

array([ 213.56098153,  545.51480718,  466.54735864, ...,  849.40541403,
        650.72824722, 1021.73125806])

In [None]:
from fastdtw import fastdtw

In [None]:
distance, path = fastdtw(wattsA, wattsC, dist=euclidean)

# Test KL-d

In [7]:
originais = [wattsA, wattsB, wattsC]
falsosA = [pd.read_csv(f)['Watts'].values for f in As]
falsosB = [pd.read_csv(f)['Watts'].values for f in Bs]
falsosC = [pd.read_csv(f)['Watts'].values for f in Cs]

In [49]:
falsos = falsosA + falsosB + falsosC

In [109]:
fA = [[wattsA, wattsB, wattsC], falsosA, falsosB, falsosC]
fB = [[wattsA, wattsB, wattsC], falsosA, falsosB, falsosC]
fC = [[wattsA, wattsB, wattsC], falsosA, falsosB, falsosC]

In [115]:
casas = ["A", "B", "C"]
scoresA = []
for i, f in enumerate(fA):
    d1 = np.array(m.kl_divergence(originais[0], f, bins=20))
    d2 = []
    for a in f:
        kld = m.kl_divergence(a, [originais[0]], bins=20)[0]
        d2.append(kld)
    d2 = np.array(d2)
    scores = (d1+d2)/2
    scoresA = scoresA + list(scores)    
    print (scores.round(decimals=2), end="|")    

[0.   0.58 0.8 ]|[0.92 0.85 0.76]|[0.3  0.24]|[0.23 0.29]|

In [120]:
casas = ["A", "B", "C"]
scoresB = []
for i, f in enumerate(fB):
    d1 = np.array(m.kl_divergence(originais[1], f, bins=20))
    d2 = []
    for a in f:
        kld = m.kl_divergence(a, [originais[1]], bins=20)[0]
        d2.append(kld)
    d2 = np.array(d2)
    scores = (d1+d2)/2  
    scoresB = scoresB + list(scores)
    print (scores.round(decimals=2), end="|")    

[0.58 0.   0.1 ]|[1.91 2.01 1.72]|[0.28 0.4 ]|[0.63 0.72]|

In [118]:
casas = ["A", "B", "C"]
scores = []
for i, f in enumerate(fC):
    d1 = np.array(m.kl_divergence(originais[2], f, bins=20))
    d2 = []
    for a in f:
        kld = m.kl_divergence(a, [originais[2]], bins=20)[0]
        d2.append(kld)
    d2 = np.array(d2)
    scores = (d1+d2)/2
    scores = scores + list(scores)
    print (scores.round(decimals=2), end="|")    

[0.8 0.1 0. ]|[2.57 2.66 2.36]|[0.42 0.6 ]|[0.96 1.08]|

0.1, 0., 2.57, 2.66, 2.36, 0.42, 0.6, 0.96, 1.08

In [122]:
dict_scores = {
    'A':scoresA,
    'B':scoresB,
    'C':scores
}
dfscores = pd.DataFrame(dict_scores)

In [125]:
dfscores = dfscores.T

In [127]:
colunas = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]

In [128]:
dfscores.columns = colunas

In [131]:
dfscores.round(decimals=2).T

Unnamed: 0,A,B,C
A,0.0,0.58,0.8
B,0.58,0.0,0.1
C,0.8,0.1,0.0
D,0.92,1.91,2.57
E,0.85,2.01,2.66
F,0.76,1.72,2.36
G,0.3,0.28,0.42
H,0.24,0.4,0.6
I,0.23,0.63,0.96
J,0.29,0.72,1.08


In [9]:
euclidean(wattsA, falsosA[0])

79490.6682222386

# Predictive Score

- Treinar uma RNN em cada casa
- Salvar o modelo
- Testar modelo em cada dataset sintético

In [None]:
# TODO: refazer códigos usando os datasets falsos como treino

In [12]:
from metricas import utils as u

In [14]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        dataX.append(a)
        dataY.append(dataset[i+look_back, :])
    return np.array(dataX), np.array(dataY)

In [44]:
wattsA = wattsA.reshape(-1,1)
wattsB = wattsB.reshape(-1,1)
wattsC = wattsC.reshape(-1,1)

In [100]:
lookback = 60
# trainX, trainY = create_dataset(train, lookback)
X, y = create_dataset(wattsC, lookback)
rnn = u.make_rnn(32, n_layers=2, n_steps=60, net_type='LSTM')
scalerTrain = u.MinMaxScaler().fit(X.reshape(-1,1))
trainX = scalerTrain.transform(X.reshape(-1,1)).reshape(X.shape)
trainY = scalerTrain.transform(y.reshape(-1,1)).reshape(y.shape)
opt = u.Adam(learning_rate=5e-4)
rnn.compile(optimizer=opt, loss='mse')

In [101]:
es = u.EarlyStopping(monitor='loss', patience=2)
hist = rnn.fit(trainX, trainY, batch_size=60, epochs=50, callbacks=[es])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50


In [103]:
rnn.save("models/casaC", save_format="h5")

In [35]:
from tensorflow.keras.models import load_model

In [76]:
rnnA = load_model("models/casaA")
rnnB = load_model("models/casaB")
rnnC = load_model("models/casaC")

In [51]:
from sklearn.metrics import mean_squared_error as mse

In [95]:
def score(modelo):
    modelo = "models/"+modelo
    rnn = load_model(modelo)
    scores = []
    for f in u.tqdm(falsos, "Computing score..."):
        lookback = 60
        f = f.reshape(-1,1)
        # trainX, trainY = create_dataset(train, lookback)
        X_teste, y_teste = create_dataset(f, lookback)
        scaler = u.MinMaxScaler().fit(X_teste.reshape(-1,1))
        testX = scaler.transform(X_teste.reshape(-1,1)).reshape(X_teste.shape)
        testY = scaler.transform(y_teste.reshape(-1,1)).reshape(y_teste.shape)
        predY = rnn.predict(testX, verbose=0)
        sc = u.mean_absolute_error(testY, predY)
        scores.append(sc)
    return np.array(scores)

In [96]:
scoresA = score("casaA")

Computing score...: 100%|██████████| 7/7 [00:18<00:00,  2.60s/it]


In [98]:
scoresB = score("casaB")

Computing score...: 100%|██████████| 7/7 [00:17<00:00,  2.50s/it]


In [109]:
scoresA[:3].mean().round(decimals=3), scoresA[3:5].mean().round(decimals=3), scoresA[5:].mean().round(decimals=3)

(0.113, 0.061, 0.075)

In [110]:
scoresB[:3].mean().round(decimals=3), scoresB[3:5].mean().round(decimals=3), scoresB[5:].mean().round(decimals=3)

(0.11, 0.056, 0.077)

In [105]:
scoresC = score("casaC")

Computing score...: 100%|██████████| 7/7 [00:21<00:00,  3.00s/it]


In [111]:
scoresC[:3].mean().round(decimals=3), scoresC[3:5].mean().round(decimals=3), scoresC[5:].mean().round(decimals=3)

(0.156, 0.06, 0.078)

In [112]:
rnnA = load_model("models/casaA")