In [1]:
import os, sys, logging
import numpy as np
from tqdm import tqdm
import pandas as pd
import pickle
from graph_utils import get_nx_graph
from lsdlm import utils, lsdlm

import time
import argparse
from scipy import sparse
import networkx as nx
from epynet import Network
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import seaborn as sns

In [2]:
print(f'loading PEMS-BAY dataset...', end=' ')

df_raw = pd.read_csv(f'data/PEMS-BAY.csv', index_col=0)
df_raw.index = pd.to_datetime(df_raw.index)
df_raw = df_raw.replace(';', '.')
df_raw = df_raw.resample('5T').asfreq().fillna(0)
with open('data/adj_mx_PEMS-BAY.pkl', 'rb') as f:
    sensor_ids, sensor_id_to_ind, adj_mx = pickle.load(f, encoding='latin1')

N = len(df_raw.columns)
order = [sensor_id_to_ind[str(sensor)] for sensor in df_raw.columns]
adj = np.zeros((N, N))
for i, o_r in enumerate(order):
    for j, o_c in enumerate(order):
        adj[i, j] = adj_mx[o_r, o_c]

print('done.')

loading PEMS-BAY dataset... done.


In [3]:
df_raw.describe().mean(axis=1)

count    52128.000000
mean        62.605158
std          8.623266
min          0.000000
25%         62.347615
50%         65.372769
75%         67.035692
max         75.492000
dtype: float64

In [6]:
def train(training_dataset, weight_matrix, save_to='data/pretrained_PEMS-BAY.model'):
    model = lsdlm.DLM(adj_mx=np.maximum(weight_matrix, weight_matrix.T), num_diff_periods=5)  # undirected graph
    print('model created... start to train...')
    model.fit(training_dataset)
    model.save_model(save_to)
    print('training finished!')

df_train, df_test = utils.split_dataset(df_raw)

file = open("test_df_train.txt", "w+")
content = str(df_train)
file.write(content)
file.close()

file1 = open("test_df_test.txt", "w+")
content1 = str(df_test)
file1.write(content1)
file1.close()

splitting dataset to training and test set (8:2 ratio)... done.


In [None]:
df_test = utils.preprocess(df_test, replace={'from': 0.0, 'to': np.NaN})
need_train = True

train_model_path = f'data/pretrained_2018_SCADA.model'
if need_train==True:
    train(save_to=train_model_path, training_dataset=df_train, weight_matrix=adj)
    # model is saved as it will take around 16 min.

model = pickle.load(open(train_model_path, 'rb'))
before = time.time()
df_pred = model.predict(df_test, step_ahead=3)
print(f'RMSE: {np.sqrt(((df_test - df_pred) ** 2).mean().mean()):.2f}\n')
print(f'Total computation for prediction: {time.time() - before:.2f} sec')