In [1]:
import torch
import numpy as np 
import h5py
from sklearn.model_selection import train_test_split

In [2]:
dataset_path = "../data/UT_HAR_OG"
#X = torch.load(dataset_path+"/data/X.pt")
#y = torch.load(dataset_path+"/label/y.pt")

In [3]:
with h5py.File(dataset_path + "/X.h5", "r") as f:
    X = torch.tensor(f["X"][:])

with h5py.File(dataset_path + "/y.h5", "r") as f:
    y = torch.tensor(f["y"][:])

In [4]:
X.shape

torch.Size([110793, 10, 180])

In [5]:
y.shape

torch.Size([110793])

In [6]:
X_train_val, X_test, y_train_val, y_test = train_test_split(X,y,test_size=0.3, stratify=y, random_state=42)

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, stratify=y_train_val, random_state=42)

In [8]:
def print_class_distribution(y, label):
    unique, counts = np.unique(y, return_counts=True)
    print(f"{label} distribution:")
    for cls, count in zip(unique, counts):
        print(f"Class {cls}: {count} samples")
    print()

print_class_distribution(y_train, "Train")
print_class_distribution(y_val, "Validation")
print_class_distribution(y_test, "Test")


Train distribution:
Class 0: 50296 samples
Class 1: 1552 samples
Class 2: 1080 samples
Class 3: 1187 samples
Class 4: 2790 samples
Class 5: 1009 samples
Class 6: 753 samples
Class 7: 3377 samples

Validation distribution:
Class 0: 12574 samples
Class 1: 388 samples
Class 2: 270 samples
Class 3: 297 samples
Class 4: 697 samples
Class 5: 252 samples
Class 6: 188 samples
Class 7: 845 samples

Test distribution:
Class 0: 26945 samples
Class 1: 831 samples
Class 2: 579 samples
Class 3: 636 samples
Class 4: 1494 samples
Class 5: 540 samples
Class 6: 404 samples
Class 7: 1809 samples



In [9]:
X_train.shape

torch.Size([62044, 10, 180])

In [10]:
#Test reshaping
num_antennas = 3
num_subcarriers = 30
num_time_slices = 10
wifi_csi_frame = X_train[:4]
wifi_csi_frame.shape

torch.Size([4, 10, 180])

In [11]:
print(wifi_csi_frame[0,0])

tensor([15.7798, 16.7691, 17.0895, 16.8123, 16.5654, 15.9669, 15.2322, 15.7261,
        15.8166, 14.8620, 15.1192, 14.8450, 14.1004, 15.0191, 14.1287, 14.1404,
        14.3330, 13.3037, 12.9813, 11.4433, 11.7646, 11.1718, 10.8239, 11.9856,
        11.0407, 11.7461, 12.7092, 11.2321, 12.2308, 10.5441, 21.1960, 22.0276,
        22.1279, 21.8224, 20.8091, 19.8956, 18.9261, 18.8355, 18.4101, 17.5706,
        17.9691, 18.5169, 17.7886, 19.1970, 18.4071, 18.5938, 19.1925, 19.8019,
        20.3752, 19.9934, 19.9413, 20.6682, 20.1691, 20.7630, 19.5423, 20.2392,
        21.3045, 20.0699, 20.6395, 19.1388, 14.6596, 16.8031, 18.6339, 19.8437,
        20.4787, 21.5536, 21.9554, 22.9991, 23.4690, 23.0957, 23.4438, 23.2856,
        22.5349, 23.0822, 21.9371, 21.8803, 22.3824, 22.3439, 22.7056, 22.4349,
        22.9355, 24.1681, 24.2219, 25.2495, 24.6788, 25.0087, 26.0917, 24.9216,
        24.7743, 23.0023, 14.3883, 14.2185, 13.9090, 13.7718, 13.7459, 13.7247,
        13.9097, 13.9690, 14.0660, 14.14

In [12]:
amplitudes = wifi_csi_frame[:,:,:num_subcarriers*num_antennas]
amplitudes[0,0]

tensor([15.7798, 16.7691, 17.0895, 16.8123, 16.5654, 15.9669, 15.2322, 15.7261,
        15.8166, 14.8620, 15.1192, 14.8450, 14.1004, 15.0191, 14.1287, 14.1404,
        14.3330, 13.3037, 12.9813, 11.4433, 11.7646, 11.1718, 10.8239, 11.9856,
        11.0407, 11.7461, 12.7092, 11.2321, 12.2308, 10.5441, 21.1960, 22.0276,
        22.1279, 21.8224, 20.8091, 19.8956, 18.9261, 18.8355, 18.4101, 17.5706,
        17.9691, 18.5169, 17.7886, 19.1970, 18.4071, 18.5938, 19.1925, 19.8019,
        20.3752, 19.9934, 19.9413, 20.6682, 20.1691, 20.7630, 19.5423, 20.2392,
        21.3045, 20.0699, 20.6395, 19.1388, 14.6596, 16.8031, 18.6339, 19.8437,
        20.4787, 21.5536, 21.9554, 22.9991, 23.4690, 23.0957, 23.4438, 23.2856,
        22.5349, 23.0822, 21.9371, 21.8803, 22.3824, 22.3439, 22.7056, 22.4349,
        22.9355, 24.1681, 24.2219, 25.2495, 24.6788, 25.0087, 26.0917, 24.9216,
        24.7743, 23.0023])

In [13]:
phases = wifi_csi_frame[:,:,num_antennas*num_subcarriers:]
phases[0,0]

tensor([14.3883, 14.2185, 13.9090, 13.7718, 13.7459, 13.7247, 13.9097, 13.9690,
        14.0660, 14.1434, 14.2629, 14.3596, 14.5057, 14.4884, 14.9826, 15.0107,
        14.9374, 14.9695, 14.9747, 15.0123, 14.9571, 14.9180, 14.9427, 14.9251,
        14.9197, 14.7495, 14.6023, 14.3267, 14.0418, 14.3883, 13.0333, 12.7555,
        12.3799, 12.1520, 11.9134, 11.8546, 11.9498, 11.9844, 12.0688, 12.1502,
        12.3399, 12.4086, 12.4327, 12.5484, 13.0068, 13.0633, 13.1170, 13.1746,
        13.2497, 13.1945, 13.2458, 13.2144, 13.2667, 13.3088, 13.2674, 13.2535,
        13.0888, 12.9233, 12.7574, 13.0333, 13.4517, 13.3349, 13.1073, 12.9542,
        12.7864, 12.6720, 12.7260, 12.7004, 12.6493, 12.6444, 12.6552, 12.6584,
        12.7387, 12.7700, 13.2346, 13.3165, 13.4232, 13.5453, 13.6351, 13.7469,
        13.8447, 13.8824, 13.9372, 13.9246, 13.9125, 13.8189, 13.6341, 13.4350,
        13.2022, 13.4517])

In [14]:
csi_data = torch.stack((amplitudes,phases),dim=-1)
csi_data.shape

torch.Size([4, 10, 90, 2])

In [15]:
csi_data

tensor([[[[15.7798, 14.3883],
          [16.7691, 14.2185],
          [17.0895, 13.9090],
          ...,
          [24.9216, 13.4350],
          [24.7743, 13.2022],
          [23.0023, 13.4517]],

         [[16.1613, 14.5834],
          [17.5160, 14.3152],
          [17.5887, 14.0327],
          ...,
          [25.0349, 13.7834],
          [24.4863, 13.5458],
          [23.3070, 13.8259]],

         [[15.4553, 14.6143],
          [16.8243, 14.3964],
          [16.4779, 14.0871],
          ...,
          [25.1859, 13.9241],
          [24.6345, 13.6991],
          [23.6263, 13.9892]],

         ...,

         [[15.5519, 14.5681],
          [16.7517, 14.4255],
          [17.0895, 14.0761],
          ...,
          [25.3157, 13.7260],
          [24.9542, 13.4787],
          [23.6344, 13.7598]],

         [[15.3938, 14.8327],
          [16.1837, 14.6586],
          [16.5684, 14.3993],
          ...,
          [24.6157, 13.9069],
          [24.2885, 13.6860],
          [22.6916, 13.9562]],



In [16]:
csi_data = csi_data.permute(0,2,3,1)
csi_data.shape

torch.Size([4, 90, 2, 10])

In [17]:
csi_data = csi_data.reshape(-1,2,10)
csi_data.shape

torch.Size([360, 2, 10])

In [18]:
from src.ut_har.ut_har import make_dataset, make_dataloader

train_dataset, val_dataset, test_dataset = make_dataset(dataset_path, normalize=True, val_split=0.2, test_split=0)

  self.X = torch.tensor(X,dtype=torch.float32)
  self.y = torch.tensor(y,dtype=torch.uint8)


In [19]:
rng_generator = torch.manual_seed(42)
train_dataloader = make_dataloader(train_dataset, is_training=True, batch_size=24, generator=rng_generator)
test_dataloader = make_dataloader(val_dataset, is_training=False, batch_size=8, generator=rng_generator)

In [20]:
len(train_dataset)

88634

In [21]:
len(train_dataloader)*24

88632

In [22]:
len(val_dataset)

22159

In [23]:
len(test_dataloader)*8

22152