In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = str(1)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
from dataloader import *
from model import *
from utilities import *
from retrain_fun import *
import random

random.seed(0)
np.random.seed(0)
jax.random.PRNGKey(0)
os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions --xla_gpu_autotune_level=2"
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"



In [3]:
data_aggregated, houses = data_preprocess(only_positive=True)

In [4]:
houses

array([3538, 5746, 7719, 7901, 8565, 9278, 8156, 8386, 9160, 9019,  661,
       1642, 2335, 2361, 2818, 3456, 4373, 7536, 7800, 7951])

In [5]:
train_list = [[2361, 7719, 9019, 2335, 7951, 5746, 8565, 9278, 8156, 8386, 9160, 661, 1642, 7536, 7800]]
test_list = [[4373, 7901, 3456, 3538, 2818]]

In [6]:
dict_bs_lr = {
    "air": (1, 0.0001),
    "refrigerator": (4096, 0.01),
    "furnace": (2048, 0.0001),
    "dishwasher": (4096, 0.0001),
    "clotheswasher": (2048, 0.0001),
}

In [7]:
appliance = "air"

In [8]:
train_houses = train_list[0]
test_houses = test_list[0]
train = data_aggregated[data_aggregated["dataid"].isin(train_houses)]
test = data_aggregated[data_aggregated["dataid"].isin(test_houses)]

In [9]:
n = 99
x_train, y_train = dataloader(appliance, train, "2018-03-01 00:00:00-06", "2018-04-30 23:59:00-06", n)
scaler_x = StandardScaler()
scaler_y = StandardScaler()
x_train = scaler_x.fit_transform(x_train)
y_train = scaler_y.fit_transform(y_train)
x_train = jnp.array(x_train).reshape(x_train.shape[0], n, 1)
y_train = jnp.array(y_train)
model = seq2point()
params = model.init(jax.random.PRNGKey(0), x_train, True)
params, losses = fit(
    model,
    params,
    x_train,
    y_train,
    False,
    batch_size=dict_bs_lr[f"{appliance}"][0],
    learning_rate=dict_bs_lr[f"{appliance}"][1],
    epochs=30,
)
x_test, y_test = dataloader(appliance, test, "2018-05-01 00:00:00-06", "2018-05-10 23:59:00-06", n)
x_test = scaler_x.transform(x_test)
x_test = jnp.array(x_test).reshape(x_test.shape[0], n, 1)
y_test = jnp.array(y_test)
y_hat = model.apply(params, x_test, True, rngs={"dropout": jax.random.PRNGKey(0)})
test_mean = scaler_y.inverse_transform(y_hat[0])
test_sigma = scaler_y.scale_ * y_hat[1]
print(f"RMSE : {rmse(y_test, test_mean):.4f}, MAE : {mae(y_test, test_mean):.4f}")

2023-05-08 00:47:20.387457: W external/org_tensorflow/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc:825] None of the algorithms provided by cuDNN heuristics worked; trying fallback algorithms.  Conv: (f32[1030660,40,99]{2,1,0}, u8[0]{0}) custom-call(f32[1030660,30,104]{2,1,0}, f32[40,30,6]{2,1,0}), window={size=6}, dim_labels=bf0_oi0->bf0, custom_call_target="__cudnn$convForward", backend_config="{\"conv_result_scale\":1,\"activation_mode\":\"0\",\"side_input_scale\":0}"
2023-05-08 00:47:32.227135: W external/org_tensorflow/tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 19.02GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


XlaRuntimeError: UNKNOWN: Failed to determine best cudnn convolution algorithm for:
%cudnn-conv.1 = (f32[1030660,50,99]{2,1,0}, u8[0]{0}) custom-call(f32[1030660,50,99]{2,1,0} %transpose, f32[50,50,5]{2,1,0} %transpose.1), window={size=5 pad=2_2}, dim_labels=bf0_oi0->bf0, custom_call_target="__cudnn$convForward", metadata={op_name="jit(conv_general_dilated)/jit(main)/conv_general_dilated[window_strides=(1,) padding=((2, 2),) lhs_dilation=(1,) rhs_dilation=(1,) dimension_numbers=ConvDimensionNumbers(lhs_spec=(0, 2, 1), rhs_spec=(2, 1, 0), out_spec=(0, 2, 1)) feature_group_count=1 batch_group_count=1 precision=None preferred_element_type=None]" source_file="/home/dhruv.patel/final_active/pos/model.py" source_line=25}, backend_config="{\"conv_result_scale\":1,\"activation_mode\":\"0\",\"side_input_scale\":0}"

Original error: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 20423845216 bytes.

To ignore this failure and try to use a fallback algorithm (which may have suboptimal performance), use XLA_FLAGS=--xla_gpu_strict_conv_algorithm_picker=false.  Please also file a bug for the root cause of failing autotuning.

In [10]:
dict_bs_lr["air"]

(1, 0.0001)

In [None]:
plt.plot(losses)

In [None]:
idx1 = 0
idx2 = -1
fig, ax = plt.subplots(2, 2, figsize=(18, 10))
ax = ax.ravel()
ax[0].plot(y_test[idx1:idx2], label="True")
ax[1].plot(test_mean[idx1:idx2], label=f"$\mu$ Predicted", color="orange")
ax[2].plot(y_test[idx1:idx2], label="True")
ax[2].plot(test_mean[idx1:idx2], label=f"$\mu$ Predicted", color="orange")
ax[3].plot(y_test[idx1:idx2], label="True", alpha=0.7)
ax[3].plot(test_sigma[idx1:idx2], label=f"$\sigma$ Predicted", color="green")
ax[0].legend(fontsize=15, bbox_to_anchor=(0.5, 1))
ax[1].legend(fontsize=15, bbox_to_anchor=(0.5, 1))
ax[2].legend(fontsize=15, bbox_to_anchor=(0.5, 1))
ax[3].legend(fontsize=15, bbox_to_anchor=(0.5, 1))