In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import networkx as nx
import matplotlib.pyplot as plt
import ast

In [2]:
PARENT = "../Sweep/"
DATA_FOLDER = PARENT + "data_WESCO/"
DATA_FOLDER_CACHE = DATA_FOLDER + 'cache/'
T = 5

In [3]:
with open(DATA_FOLDER_CACHE + 'new_X.npy', 'rb') as f:
    X = np.load(f)

In [4]:
np.unique(X)

array([  0.        ,   1.        ,   1.03125   ,   1.05357143,
         1.05555556,   1.0625    ,   1.07142857,   1.08333333,
         1.1       ,   1.10714286,   1.11538462,   1.11666667,
         1.125     ,   1.15384615,   1.16071429,   1.22727273,
         1.3       ,   1.93333333,   1.96666667,   1.98333333,
         2.        ,   2.05      ,   2.08333333,   2.125     ,
         2.13333333,   2.21428571,   2.21666667,   2.25      ,
         2.3125    ,   2.375     ,   2.43333333,   2.45      ,
         2.5       ,   2.51666667,   2.52777778,   2.55      ,
         2.59090909,   2.6       ,   2.61666667,   2.63333333,
         2.64285714,   2.65909091,   2.66071429,   2.6875    ,
         2.69230769,   2.70454545,   2.71666667,   2.73214286,
         2.73333333,   2.76666667,   2.76785714,   2.77083333,
         2.78333333,   2.78846154,   2.79545455,   2.8       ,
         2.81666667,   2.83333333,   2.85      ,   2.85416667,
         2.86363636,   2.86666667,   2.875     ,   2.88

In [5]:
def buildAdjacencyMatrix(rows, cols):
    
    grid_graph = nx.grid_2d_graph(rows, cols)  ## connect all cells

    for u, v in grid_graph.edges():
        grid_graph[u][v]['weight'] = 1
    
    # for i in range(rows):
    #     for j in range(cols):
    #         if i > 0 and j > 0:
    #             grid_graph.add_edge((i, j), (i-1, j-1))  # top-left diagonal
    #         if i > 0 and j < cols - 1:
    #             grid_graph.add_edge((i, j), (i-1, j+1))  # top-right diagonal
    #         if i < rows - 1 and j > 0:
    #             grid_graph.add_edge((i, j), (i+1, j-1))  # bottom-left diagonal
    #         if i < rows - 1 and j < cols - 1:
    #             grid_graph.add_edge((i, j), (i+1, j+1))  # bottom-right diagonal

    def add_walls(grid_graph):

        with open(DATA_FOLDER_CACHE + 'walls.txt', 'r') as file:
            lines = file.readlines()
            filtered_lines = [l for l in lines if "#" not in l] 
            edges_to_remove = [ast.literal_eval(line.strip()) for line in filtered_lines]
        
        for n1, n2 in edges_to_remove:
            grid_graph.remove_edge(n1, n2)
            #grid_graph[n1][n2]['weight'] = 0.2

    add_walls(grid_graph)
    adj_matrix = nx.to_numpy_array(grid_graph)
    
    # G = nx.from_numpy_array(adj_matrix)
    # pos = {(i * cols + j): (j, -i) for i in range(rows) for j in range(cols)}
    # nx.draw(G, pos, with_labels=True, node_color="lightblue", node_size=300, font_size=8)
    # plt.show()
    
    # print(adj_matrix.shape)
    return adj_matrix

In [6]:
buildAdjacencyMatrix(8, 25)

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 0., 1., 0.]])

In [7]:
def getTimeSeriesDataFromCell(tensor, i, j):
    cell = tensor[:, :, :, i, j] #get all samples, get all timesteps, get all features of cell i,j
    cell_t = np.transpose(cell, (0, 2, 1)) # result is an array shaped (sample, feature, time)
    return cell_t

def buildFeatureMatrix(tensor):  
    assert len(tensor.shape) == 5 
    samples, timesteps, features, rows, cols  = tensor.shape
    ## a feature matrix with size (number of nodes, number of features, timesteps)
    ## e.g fM[0,0] extracts the timeseries corresponding to feature 0 for node 0
    nodes = rows * cols
    featureMatrixes = np.empty((nodes, samples, features, timesteps))
    for i in range(rows):
        for j in range(cols):
            n = i * cols + j
            featureMatrixes[n] = getTimeSeriesDataFromCell(tensor, i, j)

    #print(featureMatrixes.shape)
    featureMatrixes = np.transpose(featureMatrixes, (1, 3, 0, 2))
    print(featureMatrixes.shape)
    return featureMatrixes        

In [8]:
X_no_loc = X[:, :, :, :, :] ## get rid of the location slice
adjMatrix = buildAdjacencyMatrix(8, 25)
featureMatrixes = buildFeatureMatrix(X_no_loc)  #(READINGS, 5, 200, 16)

samples, timesteps, features, rows, cols  = X_no_loc.shape

(109, 5, 200, 16)


In [9]:
# Save adj to pickle file.
import pickle

adjMatrix = adjMatrix.astype(np.float32)

adj_matrix_file = DATA_FOLDER_CACHE + "/adj_mx.pkl"
with open(adj_matrix_file, 'wb') as f:
    pickle.dump([range(200), {i: i for i in range(200)}, adjMatrix], f, protocol=2)

featureMatrixes = featureMatrixes.astype(np.float32)


In [10]:
featureMatrixes.shape

(109, 5, 200, 16)

In [11]:
with open(DATA_FOLDER_CACHE + 'new_y.npy', 'rb') as f:
    y = np.load(f)

In [12]:
y

array([[2.08333333, 3.41666667, 4.3       , 7.81666667],
       [2.        , 3.75      , 4.23333333, 7.86666667],
       [2.        , 3.88333333, 4.18333333, 7.81666667],
       [2.        , 3.6       , 4.21666667, 7.96666667],
       [2.        , 3.76666667, 4.11666667, 7.78333333],
       [2.        , 3.7       , 4.08928571, 7.63333333],
       [2.        , 3.73333333, 4.1       , 7.6       ],
       [2.        , 3.78571429, 4.25      , 7.69230769],
       [2.        , 3.76666667, 4.2       , 7.7       ],
       [2.        , 3.73333333, 4.09615385, 7.78333333],
       [2.        , 3.9       , 4.325     , 7.98214286],
       [2.125     , 3.93333333, 4.31666667, 8.26666667],
       [2.125     , 3.8       , 4.31818182, 8.26666667],
       [2.125     , 3.86666667, 4.4       , 8.25      ],
       [2.52777778, 4.05      , 4.61363636, 8.55      ],
       [2.3125    , 3.93333333, 4.61363636, 8.46666667],
       [2.25      , 3.85      , 4.5625    , 8.31666667],
       [2.        , 3.8       ,

In [13]:
## recall notebook one: how where we defining y? 100, 101, 102, 103
"""
y[:, 0] represents readings of sensor 100
...
y[:, 3] represents readings of sensor 103
"""
y.shape

(109, 4)

In [14]:
print(type(y), y.shape)

<class 'numpy.ndarray'> (109, 4)


In [15]:
import torch

In [16]:
sensor_indices = {"davinci": 24, "metcalfe": 78, "showroom": 155, "theater": 161}
 

reshaped_y = np.random.rand(samples, 1, rows * cols, 1)


# Fill in the values for the 4 sensors
for i, sensor_index in enumerate(sensor_indices.values()):
    reshaped_y[:, 0, sensor_index, 0] = y[:, i]

y = reshaped_y


In [17]:
y.shape

(109, 1, 200, 1)

In [18]:
fS = featureMatrixes.shape
yS = y.shape


sweep_filepath = DATA_FOLDER_CACHE + f"new_model_sweep.npz"


np.savez_compressed(
        sweep_filepath,
        x=featureMatrixes,
        y=y
)



In [19]:
print (y)

[[[[0.04464755]
   [0.47528876]
   [0.869404  ]
   ...
   [0.46832446]
   [0.27199824]
   [0.62478208]]]


 [[[0.78705541]
   [0.03062705]
   [0.23340947]
   ...
   [0.37745789]
   [0.66164798]
   [0.86791789]]]


 [[[0.83310078]
   [0.07465532]
   [0.48558846]
   ...
   [0.86057543]
   [0.9233857 ]
   [0.89804312]]]


 ...


 [[[0.58944806]
   [0.53968264]
   [0.82194403]
   ...
   [0.19064048]
   [0.6808573 ]
   [0.88862016]]]


 [[[0.95557348]
   [0.92825013]
   [0.651971  ]
   ...
   [0.47684722]
   [0.73914499]
   [0.83754739]]]


 [[[0.77760126]
   [0.00287546]
   [0.23476768]
   ...
   [0.53555159]
   [0.07592176]
   [0.0187343 ]]]]


In [20]:
print(y)


[[[[0.04464755]
   [0.47528876]
   [0.869404  ]
   ...
   [0.46832446]
   [0.27199824]
   [0.62478208]]]


 [[[0.78705541]
   [0.03062705]
   [0.23340947]
   ...
   [0.37745789]
   [0.66164798]
   [0.86791789]]]


 [[[0.83310078]
   [0.07465532]
   [0.48558846]
   ...
   [0.86057543]
   [0.9233857 ]
   [0.89804312]]]


 ...


 [[[0.58944806]
   [0.53968264]
   [0.82194403]
   ...
   [0.19064048]
   [0.6808573 ]
   [0.88862016]]]


 [[[0.95557348]
   [0.92825013]
   [0.651971  ]
   ...
   [0.47684722]
   [0.73914499]
   [0.83754739]]]


 [[[0.77760126]
   [0.00287546]
   [0.23476768]
   ...
   [0.53555159]
   [0.07592176]
   [0.0187343 ]]]]


In [21]:
X_train

NameError: name 'X_train' is not defined

In [None]:
y_train

array([[[[0.1891728 ],
         [0.75371179],
         [0.44078092],
         ...,
         [0.91728714],
         [0.84057797],
         [0.29911774]]],


       [[[0.61685681],
         [0.6908601 ],
         [0.24803723],
         ...,
         [0.27482839],
         [0.06420326],
         [0.20211824]]],


       [[[0.02461357],
         [0.26726842],
         [0.40391369],
         ...,
         [0.35253896],
         [0.68537169],
         [0.39941173]]],


       ...,


       [[[0.62518643],
         [0.60583608],
         [0.44199101],
         ...,
         [0.39785744],
         [0.63458037],
         [0.27628439]]],


       [[[0.46723715],
         [0.89986301],
         [0.17561518],
         ...,
         [0.56812285],
         [0.59516551],
         [0.2504576 ]]],


       [[[0.04027226],
         [0.91904892],
         [0.56531024],
         ...,
         [0.15117749],
         [0.1361319 ],
         [0.04152281]]]])

In [None]:
featureMatrixes[:training_samples]


array([[[[ 17.,   0.,  93., ...,  28.,   1.,   1.],
         [ 17.,   0.,  93., ...,  27.,   1.,   0.],
         [ 17.,   0.,  93., ...,  26.,   1.,   0.],
         ...,
         [ 17.,   0.,  93., ...,  26.,   1.,   1.],
         [ 17.,   0.,  93., ...,  27.,   1.,   0.],
         [ 17.,   0.,  93., ...,  28.,   1.,   1.]],

        [[  3.,  45.,  94., ...,  28.,   1.,   1.],
         [  3.,  45.,  94., ...,  27.,   1.,   0.],
         [  3.,  45.,  94., ...,  26.,   1.,   0.],
         ...,
         [  3.,  45.,  94., ...,  26.,   1.,   1.],
         [  3.,  45.,  94., ...,  27.,   1.,   0.],
         [  3.,  45.,  94., ...,  28.,   1.,   1.]],

        [[  4.,   0.,  94., ...,  28.,   1.,   1.],
         [  4.,   0.,  94., ...,  27.,   1.,   0.],
         [  4.,   0.,  94., ...,  26.,   1.,   0.],
         ...,
         [  4.,   0.,  94., ...,  26.,   1.,   1.],
         [  4.,   0.,  94., ...,  27.,   1.,   0.],
         [  4.,   0.,  94., ...,  28.,   1.,   1.]],

        [[  4., 