# Data exploration of the NYC dataset

In [1]:
import pandas as pd
import numpy as np
import torch

## Data tensor

In [2]:
data = torch.tensor(np.load("STGAN/nyc/data/data.npy"), dtype=torch.float)
data.shape

torch.Size([16848, 862, 2, 2])

In [3]:
data[1000:1030,50,0,0]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.])

## Node subgraph tensor

In [4]:
node_subgraph = torch.tensor(np.load("STGAN/nyc/data/node_subgraph.npy"), dtype=torch.float)
node_subgraph.shape

torch.Size([862, 9, 9])

In [5]:
node_subgraph[0,:,:]

tensor([[   0.0000,  611.7753,  681.0370,  761.8346,  830.3263,  914.2402,
            0.0000,    0.0000, 1195.8546],
        [ 611.7753,    0.0000,    0.0000,  419.4126,    0.0000,    0.0000,
            0.0000,    0.0000,    0.0000],
        [ 681.0370,    0.0000,    0.0000,    0.0000,  255.4936,  535.2710,
          246.0284,    0.0000,    0.0000],
        [ 761.8346,  419.4126,    0.0000,    0.0000,    0.0000,    0.0000,
            0.0000,    0.0000,    0.0000],
        [ 830.3263,    0.0000,  255.4936,    0.0000,    0.0000,    0.0000,
          198.0068,    0.0000,    0.0000],
        [ 914.2402,    0.0000,  535.2710,    0.0000,    0.0000,    0.0000,
          624.4265,  641.7573, 1246.0446],
        [   0.0000,    0.0000,  246.0284,    0.0000,  198.0068,  624.4265,
            0.0000,    0.0000,    0.0000],
        [   0.0000,    0.0000,    0.0000,    0.0000,    0.0000,  641.7573,
            0.0000,    0.0000,  738.2078],
        [1195.8546,    0.0000,    0.0000,    0.0000,    

## Time feature

In [6]:
pd.read_csv("STGAN/nyc/data/time_features.txt", delimiter=" ", header=None).head(30)
# input features: 6, length of time feature: 7 (weekday) + 24 (hour) + 8 (weather) = 31

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29,30,31,32,33,34,35,36,37,38
0,0,0,1,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,0,0
5,0,0,1,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,0,0
6,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
7,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
8,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
9,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


## Node distances

In [7]:
pd.read_csv("STGAN/nyc/data/node_dist.txt", delimiter=" ", header=None)
# Distances between nodes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,852,853,854,855,856,857,858,859,860,861
0,0.000000,761.834610,1195.854603,611.775283,830.326316,681.037050,914.240165,1140.157699,1499.605505,919.948319,...,49747.889330,44746.891110,45395.959075,48987.595869,48910.235111,48618.230290,50068.799868,50059.254411,50051.328804,49551.436719
1,761.834610,0.000000,1922.922194,419.412620,1022.913921,1070.884423,1527.826247,1887.271647,2261.383784,1208.899987,...,50454.826187,45369.044958,46033.832759,49678.579912,49594.891983,49296.504371,50767.397422,50751.141046,50735.829025,50229.464943
2,1195.854603,1922.922194,0.000000,1799.973411,1783.952253,1535.207561,1246.044503,738.207842,615.169660,1748.288273,...,48898.183520,44119.763399,44731.129533,48184.322225,48124.509614,47849.869390,49243.621990,49253.210742,49265.769298,48783.507593
3,611.775283,419.412620,1799.973411,0.000000,603.508370,671.713449,1167.741164,1615.414867,2034.887328,790.866051,...,50097.945490,44971.074390,45640.962546,49310.632465,49223.138119,48921.193381,50404.424312,50383.738590,50363.961033,49854.002866
4,830.326316,1022.913921,1783.952253,603.508370,0.000000,255.493626,760.318821,1358.182998,1827.979455,198.006762,...,49583.719489,44397.664097,45074.878653,48780.430672,48687.438790,48380.364389,49881.379448,49854.293466,49828.066502,49312.932311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857,48618.230290,49296.504371,47849.869390,48921.193381,48380.364389,48251.048691,47768.682630,47506.771845,47280.136860,48183.051886,...,4638.904989,7738.999378,5959.220934,1907.871858,966.572825,0.000000,3428.206280,2494.129660,1699.022954,933.745506
858,50068.799868,50767.397422,49243.621990,50404.424312,49881.379448,49738.467334,49242.360457,48943.402613,48690.120344,49685.497636,...,1460.526083,11124.775226,9335.505261,1629.290941,2469.426795,3428.206280,0.000000,1086.426608,2225.881976,3210.008034
859,50059.254411,50751.141046,49253.210742,50383.738590,49854.293466,49716.034421,49224.459035,48938.141570,48693.889777,49657.826992,...,2526.660380,10233.128225,8452.634093,1081.309834,1595.496236,2494.129660,1086.426608,0.000000,1139.571116,2148.226396
860,50051.328804,50735.829025,49265.769298,50363.961033,49828.066502,49694.761433,49208.195968,48935.291017,48700.641252,49631.087036,...,3655.293234,9327.320896,7567.924262,1448.486563,1141.260598,1699.022954,2225.881976,1139.571116,0.000000,1076.630878


## Adjacent nodes

In [8]:
pd.read_csv("STGAN/nyc/data/node_adjacent.txt", delimiter=" ", header=None)
# 9 nearest neighbours for each node

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0,3,5,1,4,6,9,7,2
1,1,3,0,4,5,9,6,7,2
2,2,8,7,0,6,5,10,9,4
3,3,1,4,0,5,9,6,7,2
4,4,9,5,3,6,0,1,7,2
...,...,...,...,...,...,...,...,...,...
857,857,861,856,846,839,860,848,855,851
858,858,859,843,852,855,834,860,848,822
859,859,855,858,860,843,856,861,848,834
860,860,861,859,856,855,857,858,848,843
