# Data exploration of the California (bay) dataset

In [1]:
import pandas as pd
import numpy as np
import torch

## Data tensor

In [2]:
data = torch.tensor(np.load("STGAN/bay/data/data.npy"), dtype=torch.float)
data.shape

torch.Size([36288, 365, 6, 2])

In [3]:
# First 20 entries in time, for the 1st node (VDS), in the 5th lane, of the second feature (speed)
data[:20,0,4,1]

tensor([64.5000, 64.5000, 64.5000, 64.1000, 64.3000, 64.6000, 64.3000, 64.2000,
        64.2000, 64.5000, 64.5000, 64.3000, 64.5000, 64.5000, 64.5000, 64.5000,
        64.6000, 64.5000, 64.5000, 64.5000])

## Node subgraph tensor

In [4]:
node_subgraph = torch.tensor(np.load("STGAN/bay/data/node_subgraph.npy"), dtype=torch.float)
node_subgraph.shape

torch.Size([365, 9, 9])

In [5]:
# The adjacency matrix of the 1st node (VDS) 
node_subgraph[0,:,:]

tensor([[0.0000, 0.0000, 0.1762, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.6749, 0.0000, 0.0000, 0.7380, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5756, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4522, 0.0000],
        [0.0000, 1.9735, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.3191, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.6184, 0.0000, 0.0000, 0.0000, 0.0000]])

## Time feature

In [6]:
pd.read_csv("STGAN/bay/data/time_features.txt", delimiter=" ", header=None).head(30)
# input features: 6, length of time feature: 7 (weekday) + 24 (hour) = 31

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0


## Node distances

In [7]:
pd.read_csv("STGAN/bay/data/node_dist.txt", delimiter=" ", header=None)
# Distances between nodes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,355,356,357,358,359,360,361,362,363,364
0,0.000000,17.216931,12.906703,0.731851,8.065370,0.122341,12.232376,4.436142,12.621671,4.954814,...,6.510454,6.046029,4.085960,4.068960,5.293240,5.309704,4.213317,15.263784,1.457637,1.837744
1,17.216931,0.000000,15.102320,17.118768,10.233535,17.221801,8.941150,17.303801,11.745898,13.113872,...,21.590964,21.073550,18.227021,17.366461,12.454082,12.461867,16.551916,30.898036,18.067004,18.654256
2,12.906703,15.102320,0.000000,12.236342,7.906738,12.805448,6.212664,9.111969,3.359711,8.899362,...,19.412324,18.952656,16.719821,16.355212,13.053929,13.098990,16.034593,19.275919,12.376887,12.765059
3,0.731851,17.118768,12.236342,0.000000,7.710249,0.612201,11.790166,3.705226,12.053926,4.540235,...,7.176917,6.724241,4.817694,4.783444,5.501792,5.523600,4.886365,14.944950,1.044484,1.583536
4,8.065370,10.233535,7.906738,7.710249,0.000000,8.023681,4.350945,7.098461,5.812703,3.198203,...,13.983614,13.459903,10.719803,10.080465,5.636866,5.677748,9.501791,20.665371,8.472855,9.053564
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,5.309704,12.461867,13.098990,5.523600,5.677748,5.361106,9.874227,7.796041,11.456473,4.456571,...,9.245767,8.717504,5.841536,5.024620,0.045467,0.000000,4.270752,20.459167,6.567722,7.078488
361,4.213317,16.551916,16.034593,4.886365,9.501791,4.332118,13.839272,8.485439,15.007494,7.193034,...,5.039528,4.521792,1.722587,0.837104,4.293197,4.270752,0.000000,18.766914,5.619813,5.834670
362,15.263784,30.898036,19.275919,14.944950,20.665371,15.188660,23.167761,13.620895,21.581898,17.919852,...,16.736569,16.779165,17.566639,18.178403,20.435296,20.459167,18.766914,0.000000,13.905737,13.432270
363,1.457637,18.067004,12.376887,1.044484,8.472855,1.355014,12.421787,3.449162,12.469617,5.275116,...,7.177318,6.771419,5.266552,5.389737,6.546074,6.567722,5.619813,13.905737,0.000000,0.590255


## Adjacent nodes

In [8]:
pd.read_csv("STGAN/bay/data/node_adjacent.txt", delimiter=" ", header=None)
# 9 nearest neighbours for each node

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,132,244,19,211,325,5,157,95
1,1,119,308,311,20,209,67,53,22
2,2,100,126,246,109,339,13,26,91
3,3,72,149,212,245,220,29,59,134
4,4,45,118,161,236,249,137,150,9
...,...,...,...,...,...,...,...,...,...
360,360,257,359,210,258,319,348,33,351
361,361,264,358,263,357,262,254,261,260
362,362,30,279,282,288,181,280,154,155
363,363,305,364,197,170,192,108,167,189
