In [246]:
import pandas as pd
import numpy as np
import json
from tqdm import tqdm
import networkx as nx
import ast


In [247]:
PARENT = ""
DATA_FOLDER = PARENT + "data_WESCO/"
DATA_FOLDER_CACHE = DATA_FOLDER + 'cache/'
VENUE_CONFIG_FILE = DATA_FOLDER_CACHE + "WESCO_config.json"
VENUE = "WESCO"

In [248]:
def read_json_config(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

config = read_json_config(VENUE_CONFIG_FILE)

In [249]:
sensors_unparsed = config[VENUE]["placements"]
sensors = {int(key): value for key, value in sensors_unparsed.items()}
sensors

{100: {'row': 3, 'column': 3},
 101: {'row': 6, 'column': 5},
 102: {'row': 6, 'column': 11},
 103: {'row': 0, 'column': 24}}

In [250]:
df = pd.read_csv(DATA_FOLDER_CACHE + VENUE + '.csv', index_col=0)
#df = df.dropna()

In [251]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4409 entries, 0 to 4408
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           4409 non-null   object 
 1   pm2_5_avg_100  4409 non-null   float64
 2   pm2_5_avg_101  4409 non-null   float64
 3   pm2_5_avg_102  4409 non-null   float64
 4   pm2_5_avg_103  4409 non-null   float64
 5   status_100     4409 non-null   int64  
 6   status_101     4409 non-null   int64  
 7   status_102     4409 non-null   int64  
 8   status_103     4409 non-null   int64  
dtypes: float64(4), int64(4), object(1)
memory usage: 344.5+ KB


In [252]:
#new features from Date -> minute, hour, month, year, dayofyear (1-365)

In [253]:
df['Date'] = pd.to_datetime(df['Date'])
df['hour'] = df['Date'].dt.hour
df['minute'] = df['Date'].dt.minute
df['dayoftheyear'] = df['Date'].dt.dayofyear
df['hour'] = df['hour'].astype('int')
df['minute'] = df['minute'].astype('int')
df['dayoftheyear'] = df['dayoftheyear'].astype('int')
df = df.sort_values(['Date']) #already sorted
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4409 entries, 0 to 4408
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           4409 non-null   datetime64[ns]
 1   pm2_5_avg_100  4409 non-null   float64       
 2   pm2_5_avg_101  4409 non-null   float64       
 3   pm2_5_avg_102  4409 non-null   float64       
 4   pm2_5_avg_103  4409 non-null   float64       
 5   status_100     4409 non-null   int64         
 6   status_101     4409 non-null   int64         
 7   status_102     4409 non-null   int64         
 8   status_103     4409 non-null   int64         
 9   hour           4409 non-null   int64         
 10  minute         4409 non-null   int64         
 11  dayoftheyear   4409 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(7)
memory usage: 447.8 KB


In [254]:
dataStartDate = sorted(df['Date'])[0]
dataEndDate = sorted(df['Date'])[-1]
print(f"Dataset has data from {dataStartDate} to {dataEndDate}")

Dataset has data from 2024-04-02 17:00:00 to 2024-07-24 22:45:00


In [255]:
h, w = config[VENUE]["height"], config[VENUE]["width"]
vents = config[VENUE]["vents"]


In [256]:
mask_hvac = np.zeros((h, w), dtype=int)

for v in vents:
    mask_hvac[vents[v].get("row"), vents[v].get("column")] = 1 if vents[v].get('direction') == "down" else 2

In [257]:
mask_hvac

array([[1, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        2, 0, 2],
       [0, 1, 1, 2, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0],
       [1, 0, 0, 0, 0, 2, 1, 0, 2, 0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 1, 0, 0,
        0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0],
       [1, 1, 1, 2, 0, 0, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 0, 1, 2, 1, 0, 0,
        0, 0, 0],
       [0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        2, 1, 2],
       [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 2, 0, 0,
        1, 0, 2],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
        1, 0, 1]])

In [258]:
mapCells = []

for i in range(h):
    for j in range(w):
        mapCells.append(
            {
                "row": i,
                "column": j
            }
        )

In [259]:
def add_walls(grid_graph):
    with open(DATA_FOLDER_CACHE + 'walls.txt', 'r') as file:
        lines = file.readlines()
        filtered_lines = [l for l in lines if "#" not in l] 
        edges_to_remove = [ast.literal_eval(line.strip()) for line in filtered_lines]

    for n1, n2 in edges_to_remove:
        grid_graph.remove_edge(n1, n2)

grid_graph = nx.grid_2d_graph(8, 25)  

for u, v in grid_graph.edges():
        grid_graph[u][v]['weight'] = 1

add_walls(grid_graph)

In [260]:
def get_distance(source, target):
    
    try:
        distance = nx.shortest_path_length(grid_graph, source=source, target=target, weight='weight')
    except nx.NetworkXNoPath:
        distance = 100

    return distance

In [261]:
sensorDistanceMap = { }

for i in [1,2,3,4]:
    sensorDistanceMap[i] = {
        'sensorID': np.zeros((h, w), dtype='int'),
        'distance': np.zeros((h, w), dtype='int'),
    }

for cell in mapCells:
    distances = []
    for currentSensorID, sensor in sensors.items():
        distances.append((
                get_distance(
                    (sensor['row'], sensor['column']),
                    (cell['row'], cell['column'])
                ),
                currentSensorID
            ))
    distances.sort()        
    k = 1
    for currentSensorDistance, currentSensorID in distances:
        sensorDistanceMap[k]['sensorID'][cell['row']][cell['column']] = currentSensorID
        sensorDistanceMap[k]['distance'][cell['row']][cell['column']] = currentSensorDistance
        k += 1

In [262]:
sensorDistanceMap

{1: {'sensorID': array([[100, 100, 100, 100, 100, 100, 100, 100, 100, 102, 102, 102, 102,
          102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 102, 102, 102, 102,
          102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 102, 102, 102, 102,
          102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103, 103],
         [100, 100, 100, 100, 101, 101, 101, 101, 101, 102, 102, 102, 102,
          102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103, 103],
         [100, 100, 100, 100, 101, 101, 101, 101, 101, 102, 102, 102, 102,
          102, 102, 102, 102, 102, 102, 103, 103, 103, 103, 103, 103],
         [100, 100, 100, 100, 101, 101, 101, 101, 101, 102, 102, 102, 102,
          102, 102, 102, 102, 102, 102, 102, 102, 102, 103, 103, 103],
         [100, 100, 100, 100, 101, 101, 101, 101, 101, 102, 102, 102, 102,
          102, 102, 102, 102, 102,

In [263]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4409 entries, 0 to 4408
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           4409 non-null   datetime64[ns]
 1   pm2_5_avg_100  4409 non-null   float64       
 2   pm2_5_avg_101  4409 non-null   float64       
 3   pm2_5_avg_102  4409 non-null   float64       
 4   pm2_5_avg_103  4409 non-null   float64       
 5   status_100     4409 non-null   int64         
 6   status_101     4409 non-null   int64         
 7   status_102     4409 non-null   int64         
 8   status_103     4409 non-null   int64         
 9   hour           4409 non-null   int64         
 10  minute         4409 non-null   int64         
 11  dayoftheyear   4409 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(7)
memory usage: 447.8 KB


In [264]:
y = df[['pm2_5_avg_100', 'pm2_5_avg_101', 'pm2_5_avg_102','pm2_5_avg_103']].to_numpy()


In [265]:
X = df.drop(columns=['Date'])
X_columns = X.columns 
X = X.to_numpy() 


In [266]:
X

array([[  0.        ,   1.23333333,   1.03333333, ...,  17.        ,
          0.        ,  93.        ],
       [  0.        ,   1.        ,   0.7       , ...,   3.        ,
         45.        ,  94.        ],
       [  0.        ,   0.98333333,   0.76666667, ...,   4.        ,
          0.        ,  94.        ],
       ...,
       [  1.14285714,   2.2       ,   3.05      , ...,  22.        ,
         15.        , 206.        ],
       [  1.14285714,   2.2       ,   3.11666667, ...,  22.        ,
         30.        , 206.        ],
       [  1.14285714,   2.2       ,   3.05      , ...,  22.        ,
         45.        , 206.        ]])

In [267]:
X.shape, y.shape

((4409, 11), (4409, 4))

In [268]:
X_columns

Index(['pm2_5_avg_100', 'pm2_5_avg_101', 'pm2_5_avg_102', 'pm2_5_avg_103',
       'status_100', 'status_101', 'status_102', 'status_103', 'hour',
       'minute', 'dayoftheyear'],
      dtype='object')

In [269]:
cubeFeatures = ['hour', 'minute', 'day_of_the_year', 
            'pm25_1_closest_concentration', 'pm25_1_closest_distance', 'status_closest_1',
            'pm25_2_closest_concentration', 'pm25_2_closest_distance', 'status_closest_2', 
            'pm25_3_closest_concentration', 'pm25_3_closest_distance', 'status_closest_3',
            'pm25_4_closest_concentration', 'pm25_4_closest_distance', 'status_closest_4',
            'hvac_locations']

def indexOfFeatureInCube(feature): 
    return cubeFeatures.index(feature)

def indexOfFeature(feature):
    return list(X_columns).index(feature)

In [270]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4409 entries, 0 to 4408
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           4409 non-null   datetime64[ns]
 1   pm2_5_avg_100  4409 non-null   float64       
 2   pm2_5_avg_101  4409 non-null   float64       
 3   pm2_5_avg_102  4409 non-null   float64       
 4   pm2_5_avg_103  4409 non-null   float64       
 5   status_100     4409 non-null   int64         
 6   status_101     4409 non-null   int64         
 7   status_102     4409 non-null   int64         
 8   status_103     4409 non-null   int64         
 9   hour           4409 non-null   int64         
 10  minute         4409 non-null   int64         
 11  dayoftheyear   4409 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(7)
memory usage: 447.8 KB


In [271]:

def oneDimensionToSpace(space_X, sampleIndex, reading):
        
    for featureIndex, feature in enumerate(cubeFeatures):

        if feature == 'hour':
            space_X[sampleIndex, featureIndex] = np.ones((h,w)) * reading[indexOfFeature('hour')]
        elif feature == 'minute':
            space_X[sampleIndex, featureIndex] = np.ones((h,w)) * reading[indexOfFeature('minute')]
        elif feature == 'day_of_the_year':
            space_X[sampleIndex, featureIndex] = np.ones((h,w)) * reading[indexOfFeature('dayoftheyear')]

        elif feature in ['pm25_1_closest_concentration', 'pm25_2_closest_concentration', 'pm25_3_closest_concentration', 'pm25_4_closest_concentration']:
            
            nthClosest = int(feature[5]) # 5th char of 'feature' is '1', '2', (nth closest)
            nthClosestSensorIDMap = sensorDistanceMap[nthClosest]['sensorID']
            statusFeatureIndex = cubeFeatures.index(f"status_closest_{nthClosest}")

            for cell in mapCells:
                row, col = cell['row'], cell['column']
                
                cellSensorID = nthClosestSensorIDMap[row][col]
                rowX = X[sampleIndex]
                readingPM25 = rowX[indexOfFeature('pm2_5_avg_' + str(cellSensorID))]
                statusSensor = rowX[indexOfFeature('status_' + str(cellSensorID))]
                space_X[sampleIndex, featureIndex, row, col] = readingPM25
                space_X[sampleIndex, statusFeatureIndex, row, col] = statusSensor

        elif feature in ["status_closest_1", "status_closest_2", "status_closest_3", "status_closest_4" ]:
            continue # 

        

In [272]:
space_X_filepath = DATA_FOLDER_CACHE + '/space_X.npy'

n = df.shape[0]
c = len(cubeFeatures) # channels == FEATURES

try:
    space_X = np.load(space_X_filepath)
    print('File loaded from cache')
except FileNotFoundError:
    space_X = np.empty((n, c, h, w))
    space_X[:] = np.nan

    # Add HVAC vents locations
    hvacFeatureIndex = cubeFeatures.index(f"hvac_locations")
    space_X[:, hvacFeatureIndex] = mask_hvac
    
    # Add the PM2.5 closest distance features
    for k in [1, 2, 3, 4]:
        featureIndex = cubeFeatures.index(f"pm25_{k}_closest_distance")
        space_X[:, featureIndex] = sensorDistanceMap[k]['distance']

    for i, reading in tqdm(enumerate(X), total=len(X)):
        oneDimensionToSpace(space_X, i, reading)
    
    np.save(space_X_filepath, space_X)

File loaded from cache


In [273]:
assert(X.shape[0] == y.shape[0])

In [274]:
space_X

array([[[[ 17.,  17.,  17., ...,  17.,  17.,  17.],
         [ 17.,  17.,  17., ...,  17.,  17.,  17.],
         [ 17.,  17.,  17., ...,  17.,  17.,  17.],
         ...,
         [ 17.,  17.,  17., ...,  17.,  17.,  17.],
         [ 17.,  17.,  17., ...,  17.,  17.,  17.],
         [ 17.,  17.,  17., ...,  17.,  17.,  17.]],

        [[  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         ...,
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.],
         [  0.,   0.,   0., ...,   0.,   0.,   0.]],

        [[ 93.,  93.,  93., ...,  93.,  93.,  93.],
         [ 93.,  93.,  93., ...,  93.,  93.,  93.],
         [ 93.,  93.,  93., ...,  93.,  93.,  93.],
         ...,
         [ 93.,  93.,  93., ...,  93.,  93.,  93.],
         [ 93.,  93.,  93., ...,  93.,  93.,  93.],
         [ 93.,  93.,  93., ...,  93.,  93.,  93.]],

        ...,

  

In [275]:
space_X.shape

(4409, 16, 8, 25)

In [276]:
t = 5 # 1 hour if sampling is 15 min

CHUNK_SIZE = space_X.shape[0]  
i = 0
    
new_X_filepath = DATA_FOLDER_CACHE + f"/new_X.npy"
new_X = np.empty((CHUNK_SIZE, t, c, h, w))

countPreviousReadings = 0
new_index = 0

with tqdm(total=CHUNK_SIZE) as progress_bar:
    while i < CHUNK_SIZE and new_index < CHUNK_SIZE:
        #countPreviousReadings = 0  # Reset every iteration? Might be an issue
        print(f"i: {i}, new_index: {new_index}, countPreviousReadings: {countPreviousReadings}, t-1: {t-1}")

        if countPreviousReadings >= t - 1:
            print("I executed this")
            new_X[new_index] = np.array([space_X[i-t+1 : i+1]])
            new_index += 1
            progress_bar.update(1)

        countPreviousReadings += 1
        print(f"Updated countPreviousReadings: {countPreviousReadings}")
        i += 1

np.save(new_X_filepath, new_X)

print("Files saved")


  0%|          | 0/4409 [00:00<?, ?it/s]

i: 0, new_index: 0, countPreviousReadings: 0, t-1: 4
Updated countPreviousReadings: 1
i: 1, new_index: 0, countPreviousReadings: 1, t-1: 4
Updated countPreviousReadings: 2
i: 2, new_index: 0, countPreviousReadings: 2, t-1: 4
Updated countPreviousReadings: 3
i: 3, new_index: 0, countPreviousReadings: 3, t-1: 4
Updated countPreviousReadings: 4
i: 4, new_index: 0, countPreviousReadings: 4, t-1: 4
I executed this
Updated countPreviousReadings: 5
i: 5, new_index: 1, countPreviousReadings: 5, t-1: 4
I executed this
Updated countPreviousReadings: 6
i: 6, new_index: 2, countPreviousReadings: 6, t-1: 4
I executed this
Updated countPreviousReadings: 7
i: 7, new_index: 3, countPreviousReadings: 7, t-1: 4
I executed this
Updated countPreviousReadings: 8
i: 8, new_index: 4, countPreviousReadings: 8, t-1: 4
I executed this
Updated countPreviousReadings: 9
i: 9, new_index: 5, countPreviousReadings: 9, t-1: 4
I executed this
Updated countPreviousReadings: 10
i: 10, new_index: 6, countPreviousReadings:

 28%|██▊       | 1213/4409 [00:00<00:00, 12128.45it/s]

Updated countPreviousReadings: 675
i: 675, new_index: 671, countPreviousReadings: 675, t-1: 4
I executed this
Updated countPreviousReadings: 676
i: 676, new_index: 672, countPreviousReadings: 676, t-1: 4
I executed this
Updated countPreviousReadings: 677
i: 677, new_index: 673, countPreviousReadings: 677, t-1: 4
I executed this
Updated countPreviousReadings: 678
i: 678, new_index: 674, countPreviousReadings: 678, t-1: 4
I executed this
Updated countPreviousReadings: 679
i: 679, new_index: 675, countPreviousReadings: 679, t-1: 4
I executed this
Updated countPreviousReadings: 680
i: 680, new_index: 676, countPreviousReadings: 680, t-1: 4
I executed this
Updated countPreviousReadings: 681
i: 681, new_index: 677, countPreviousReadings: 681, t-1: 4
I executed this
Updated countPreviousReadings: 682
i: 682, new_index: 678, countPreviousReadings: 682, t-1: 4
I executed this
Updated countPreviousReadings: 683
i: 683, new_index: 679, countPreviousReadings: 683, t-1: 4
I executed this
Updated co

 55%|█████▌    | 2426/4409 [00:00<00:00, 11414.71it/s]

Updated countPreviousReadings: 2349
i: 2349, new_index: 2345, countPreviousReadings: 2349, t-1: 4
I executed this
Updated countPreviousReadings: 2350
i: 2350, new_index: 2346, countPreviousReadings: 2350, t-1: 4
I executed this
Updated countPreviousReadings: 2351
i: 2351, new_index: 2347, countPreviousReadings: 2351, t-1: 4
I executed this
Updated countPreviousReadings: 2352
i: 2352, new_index: 2348, countPreviousReadings: 2352, t-1: 4
I executed this
Updated countPreviousReadings: 2353
i: 2353, new_index: 2349, countPreviousReadings: 2353, t-1: 4
I executed this
Updated countPreviousReadings: 2354
i: 2354, new_index: 2350, countPreviousReadings: 2354, t-1: 4
I executed this
Updated countPreviousReadings: 2355
i: 2355, new_index: 2351, countPreviousReadings: 2355, t-1: 4
I executed this
Updated countPreviousReadings: 2356
i: 2356, new_index: 2352, countPreviousReadings: 2356, t-1: 4
I executed this
Updated countPreviousReadings: 2357
i: 2357, new_index: 2353, countPreviousReadings: 235

 85%|████████▌ | 3761/4409 [00:00<00:00, 12266.85it/s]

Updated countPreviousReadings: 3089
i: 3089, new_index: 3085, countPreviousReadings: 3089, t-1: 4
I executed this
Updated countPreviousReadings: 3090
i: 3090, new_index: 3086, countPreviousReadings: 3090, t-1: 4
I executed this
Updated countPreviousReadings: 3091
i: 3091, new_index: 3087, countPreviousReadings: 3091, t-1: 4
I executed this
Updated countPreviousReadings: 3092
i: 3092, new_index: 3088, countPreviousReadings: 3092, t-1: 4
I executed this
Updated countPreviousReadings: 3093
i: 3093, new_index: 3089, countPreviousReadings: 3093, t-1: 4
I executed this
Updated countPreviousReadings: 3094
i: 3094, new_index: 3090, countPreviousReadings: 3094, t-1: 4
I executed this
Updated countPreviousReadings: 3095
i: 3095, new_index: 3091, countPreviousReadings: 3095, t-1: 4
I executed this
Updated countPreviousReadings: 3096
i: 3096, new_index: 3092, countPreviousReadings: 3096, t-1: 4
I executed this
Updated countPreviousReadings: 3097
i: 3097, new_index: 3093, countPreviousReadings: 309

100%|█████████▉| 4405/4409 [00:00<00:00, 12070.68it/s]

Updated countPreviousReadings: 4189
i: 4189, new_index: 4185, countPreviousReadings: 4189, t-1: 4
I executed this
Updated countPreviousReadings: 4190
i: 4190, new_index: 4186, countPreviousReadings: 4190, t-1: 4
I executed this
Updated countPreviousReadings: 4191
i: 4191, new_index: 4187, countPreviousReadings: 4191, t-1: 4
I executed this
Updated countPreviousReadings: 4192
i: 4192, new_index: 4188, countPreviousReadings: 4192, t-1: 4
I executed this
Updated countPreviousReadings: 4193
i: 4193, new_index: 4189, countPreviousReadings: 4193, t-1: 4
I executed this
Updated countPreviousReadings: 4194
i: 4194, new_index: 4190, countPreviousReadings: 4194, t-1: 4
I executed this
Updated countPreviousReadings: 4195
i: 4195, new_index: 4191, countPreviousReadings: 4195, t-1: 4
I executed this
Updated countPreviousReadings: 4196
i: 4196, new_index: 4192, countPreviousReadings: 4196, t-1: 4
I executed this
Updated countPreviousReadings: 4197
i: 4197, new_index: 4193, countPreviousReadings: 419




Files saved


In [277]:
new_X

array([[[[[ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          ...,
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.]],

         [[  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          ...,
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.]],

         [[ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          ...,
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.

In [278]:
new_y_filepath = DATA_FOLDER_CACHE + f"/new_y.npy"
np.save(new_y_filepath, y)

In [279]:
y.shape

(4409, 4)

In [280]:
new_X.shape

(4409, 5, 16, 8, 25)

In [281]:
len(new_X), len(y)

(4409, 4409)

In [282]:
new_X

array([[[[[ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          ...,
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.],
          [ 17.,  17.,  17., ...,  17.,  17.,  17.]],

         [[  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          ...,
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.],
          [  0.,   0.,   0., ...,   0.,   0.,   0.]],

         [[ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          ...,
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.],
          [ 93.,  93.,  93., ...,  93.,  93.,  93.

In [285]:
y

array([[0.        , 1.23333333, 1.03333333, 1.03333333],
       [0.        , 1.        , 0.7       , 1.15      ],
       [0.        , 0.98333333, 0.76666667, 1.25      ],
       ...,
       [1.14285714, 2.2       , 3.05      , 5.875     ],
       [1.14285714, 2.2       , 3.11666667, 6.01666667],
       [1.14285714, 2.2       , 3.05      , 5.96666667]])