In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import networkx as nx
import matplotlib.pyplot as plt
import ast

In [4]:
PARENT = ""
DATA_FOLDER = PARENT + "data_WESCO/"
DATA_FOLDER_CACHE = DATA_FOLDER + 'cache/'
T = 5

In [5]:
with open(DATA_FOLDER_CACHE + 'new_X.npy', 'rb') as f:
    X = np.load(f)

In [6]:
def buildAdjacencyMatrix(rows, cols):
    
    grid_graph = nx.grid_2d_graph(rows, cols)  ## connect all cells

    for u, v in grid_graph.edges():
        grid_graph[u][v]['weight'] = 1
    
    # for i in range(rows):
    #     for j in range(cols):
    #         if i > 0 and j > 0:
    #             grid_graph.add_edge((i, j), (i-1, j-1))  # top-left diagonal
    #         if i > 0 and j < cols - 1:
    #             grid_graph.add_edge((i, j), (i-1, j+1))  # top-right diagonal
    #         if i < rows - 1 and j > 0:
    #             grid_graph.add_edge((i, j), (i+1, j-1))  # bottom-left diagonal
    #         if i < rows - 1 and j < cols - 1:
    #             grid_graph.add_edge((i, j), (i+1, j+1))  # bottom-right diagonal

    def add_walls(grid_graph):

        with open(DATA_FOLDER_CACHE + 'walls.txt', 'r') as file:
            lines = file.readlines()
            filtered_lines = [l for l in lines if "#" not in l] 
            edges_to_remove = [ast.literal_eval(line.strip()) for line in filtered_lines]
        
        for n1, n2 in edges_to_remove:
            grid_graph.remove_edge(n1, n2)
            #grid_graph[n1][n2]['weight'] = 0.2

    add_walls(grid_graph)
    adj_matrix = nx.to_numpy_array(grid_graph)
    
    # G = nx.from_numpy_array(adj_matrix)
    # pos = {(i * cols + j): (j, -i) for i in range(rows) for j in range(cols)}
    # nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=300, font_size=8)
    # plt.show()
    
    # print(adj_matrix.shape)
    return adj_matrix

In [7]:
buildAdjacencyMatrix(8, 25)

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [8]:
def getTimeSeriesDataFromCell(tensor, i, j):
    cell = tensor[:, :, :, i, j] #get all samples, get all timesteps, get all features of cell i,j
    cell_t = np.transpose(cell, (0, 2, 1)) # result is an array shaped (sample, feature, time)
    return cell_t

def buildFeatureMatrix(tensor):  
    assert len(tensor.shape) == 5 
    samples, timesteps, features, rows, cols  = tensor.shape
    ## a feature matrix with size (number of nodes, number of features, timesteps)
    ## e.g fM[0,0] extracts the timeseries corresponding to feature 0 for node 0
    nodes = rows * cols
    featureMatrixes = np.empty((nodes, samples, features, timesteps))
    for i in range(rows):
        for j in range(cols):
            n = i * cols + j
            featureMatrixes[n] = getTimeSeriesDataFromCell(tensor, i, j)

    #print(featureMatrixes.shape)
    featureMatrixes = np.transpose(featureMatrixes, (1, 3, 0, 2))
    print(featureMatrixes.shape)
    return featureMatrixes        

In [9]:
X_no_loc = X[:, :, :, :, :] ## get rid of the location slice
adjMatrix = buildAdjacencyMatrix(8, 25)
featureMatrixes = buildFeatureMatrix(X_no_loc)  #(READINGS, 5, 200, 16)

samples, timesteps, features, rows, cols  = X_no_loc.shape

(4409, 5, 200, 16)


In [10]:
# Save adj to pickle file.
import pickle

adjMatrix = adjMatrix.astype(np.float32)

adj_matrix_file = DATA_FOLDER_CACHE + "/adj_mx.pkl"
with open(adj_matrix_file, 'wb') as f:
    pickle.dump([range(200), {i: i for i in range(200)}, adjMatrix], f, protocol=2)

featureMatrixes = featureMatrixes.astype(np.float32)


In [11]:
featureMatrixes.shape

(4409, 5, 200, 16)

In [12]:
with open(DATA_FOLDER_CACHE + 'new_y.npy', 'rb') as f:
    y = np.load(f)

In [13]:
## recall notebook one: how where we defining y? 100, 101, 102, 103
"""
y[:, 0] represents readings of sensor 100
...
y[:, 3] represents readings of sensor 103
"""
y.shape

(4409, 4)

In [14]:
sensor_indices = {"davinci": 24, "metcalfe": 78, "showroom": 155, "theater": 161}
reshaped_y = np.zeros((samples, 1, rows * cols, 1)) 
for i, sensor in enumerate(sensor_indices.values()):
    reshaped_y[:, 0, sensor, 0] = y[:, i]


In [15]:
fS = featureMatrixes.shape
yS = y.shape

TRAINING_SPLIT = 0.75
training_samples = int(featureMatrixes.shape[0] * TRAINING_SPLIT)
X_train, y_train = featureMatrixes[:training_samples], y[:training_samples]
X_test, y_test = featureMatrixes[training_samples:], y[training_samples:]
X_val, y_val = X_test, y_test

train_filepath = DATA_FOLDER_CACHE + f"new_model_train.npz"
test_filepath = DATA_FOLDER_CACHE + f"new_model_test.npz"
val_filepath = DATA_FOLDER_CACHE + f"new_model_val.npz"

np.savez_compressed(
        train_filepath,
        x=X_train,
        y=y_train
)

np.savez_compressed(
        test_filepath,
        x=X_test,
        y=y_test
)

np.savez_compressed(
        val_filepath,
        x=X_val,
        y=y_val
)