In [5]:
import numpy as np
import pandas as pd
import h5py # For HDF5 files
import os

# Define the file path to our raw data.
file_path = os.path.join('..', 'data', 'raw', 'pems-bay.h5')

print(f"Attempting to load data from: {file_path}")

try:
    # Use h5py to open the file
    with h5py.File(file_path, 'r') as hf:
        # We now know 'speed' is a group, not the dataset itself.
        # Let's open the group and list its contents to find the actual data array.
        if 'speed' in hf:
            speed_group = hf['speed']
            print(f"Keys inside the 'speed' group: {list(speed_group.keys())}")

            # The actual data is usually under a key like 'block0_values' or similar
            # Let's check for that.
            if 'block0_values' in speed_group:
                traffic_data = speed_group['block0_values']
                print(f"Shape of traffic_data array: {traffic_data.shape}")
                print(f"Data type: {traffic_data.dtype}")
                print("\nFirst 5 sensors' data for the first 5 time steps:")
                print(traffic_data[:5, :5])
            else:
                print("The 'block0_values' key was not found inside the 'speed' group. The data structure may be different.")

        else:
            print("The 'speed' key was not found. The dataset structure may be different.")

except FileNotFoundError:
    print(f"Error: The file {file_path} was not found. Please check the file path and name.")
except Exception as e:
    print(f"An error occurred: {e}")

Attempting to load data from: ..\data\raw\pems-bay.h5
Keys inside the 'speed' group: ['axis0', 'axis1', 'block0_items', 'block0_values']
Shape of traffic_data array: (52116, 325)
Data type: float64

First 5 sensors' data for the first 5 time steps:
[[71.4 67.8 70.5 67.4 68.8]
 [71.6 67.5 70.6 67.5 68.7]
 [71.6 67.6 70.2 67.4 68.7]
 [71.1 67.5 70.3 68.  68.5]
 [71.7 67.8 70.2 68.1 68.4]]
