# Air Traffic Input Data Epxloration and Construction for Usiing GraphStorm

In [1]:
import os
import json
import pandas as pd

## Explore the Input Data for GraphStorm `gconstruct` Command

In [2]:
airport_path = './airport.parquet'
airport_node_df = pd.read_parquet(airport_path)

In [3]:
airport_node_df.sample(7)

Unnamed: 0,iata_code,latitude_deg,longitude_deg,inventory_amounts
378,AER,43.449902,39.9566,"[34.1535, 151.0298, 297.535, 376.4357, 389.930..."
343,ESG,-22.033912,-60.618964,"[34.3388, 152.4774, 302.5459, 376.1521, 393.61..."
39,EDI,55.950145,-3.372288,"[25.8268, 147.9907, 288.1349, 355.7757, 373.01..."
82,TFS,28.0445,-16.5725,"[33.2797, 140.5403, 284.6276, 353.8395, 376.77..."
71,TMS,0.378175,6.71215,"[33.1588, 144.1395, 290.6109, 356.0667, 367.04..."
79,FUE,28.4527,-13.8638,"[32.5105, 152.8554, 302.787, 370.7813, 390.586..."
84,ROB,6.23379,-10.3623,"[34.6886, 154.6892, 297.1849, 378.3564, 396.90..."


In [4]:
demand_edge_path = './demand_edge.parquet'
demand_edge_df = pd.read_parquet(demand_edge_path)

In [5]:
demand_edge_df.sample(7)

Unnamed: 0,src_code,dst_code,demands
108834,GCM,DLM,"[0.0, 0.0, 0.99, 0.0, 0.0, 0.0, 0.0, 0.0, 0.51..."
202856,DQM,DRW,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.82,..."
87607,TLS,SCQ,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.72,..."
24108,MSP,TOS,"[0.0, 0.49, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18..."
2516,SMF,YOW,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, ..."
202731,SDF,DRW,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.57, 0.0,..."
35883,SFO,FIH,"[0.0, 0.96, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


In [6]:
traffic_edge_path = 'traffic_edge.parquet'
traffic_edge_df = pd.read_parquet(traffic_edge_path)

In [7]:
traffic_edge_df.sample(7)

Unnamed: 0,src_code,dst_code,capacity,traffics
6379,RSW,OAK,0.5625,"[0.0227, 0.0227, 0.0227, 0.0227, 0.0227, 0.022..."
1175,CKG,PVG,1.5,"[0.0367, 0.0367, 0.0367, 0.0367, 0.0367, 0.036..."
3138,IND,TFS,0.0625,"[0.0625, 0.0625, 0.0625, 0.0125, 0.0125, 0.012..."
7789,THR,IND,0.25,"[0.25, 0.145, 0.145, 0.145, 0.145, 0.145, 0.14..."
5253,OMA,SDF,1.0,"[0.045, 0.045, 0.045, 0.045, 0.045, 0.045, 0.0..."
2604,HAK,SAN,0.25,"[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, ..."
7224,SJC,SRQ,1.5,"[0.0388, 0.0388, 0.0388, 0.0388, 0.0388, 0.038..."


### Prepare the JSON file for `gconstruct` command

In [8]:
air_traffic_json = {"version": "gconstruct-v0.1"}

Node objects

In [9]:
nodes = []
airport = {
    "node_type": "airport",
    "format": {
        "name": "parquet"
    },
    "files": [
        airport_path
    ],
    "node_id_col": "iata_code",
    "features": [
        {
            "feature_col": "latitude_deg",
            "feature_name": "latitude"
        },
        {
            "feature_col": "longitude_deg",
            "feature_name": "longitude"
        },
        {
            "feature_col": "inventory_amounts",
            "feature_name": "inventory_amounts"
        }
    ],
    "labels": [
        {
            "label_col": "inventory_amounts",
            "task_type": "regression",
            "split_pct": [
                0.8,
                0.1,
                0.1
            ]
        }
    ]
}

nodes.append(airport)

Edge objects

In [10]:
edges = []
ap_demand_ap = {
    "relation": [
        "airport",
        "demand",
        "airport"
    ],
    "format": {
        "name": "parquet"
    },
    "files": [
        demand_edge_path
    ],
    "source_id_col": "src_code",
    "dest_id_col": "dst_code",
    "features": [
        {
            "feature_col": "demands",
            "feature_name": "demands"
        }
    ]
}
ap_traffic_ap = {
    "relation": [
        "airport",
        "traffic",
        "airport"
    ],
    "format": {
        "name": "parquet"
    },
    "files": [
        traffic_edge_path
    ],
    "source_id_col": "src_code",
    "dest_id_col": "dst_code",
    "features": [
        {
            "feature_col": "capacity",
            "feature_name": "capacity"
        },
        {
            "feature_col": "traffics",
            "feature_name": "traffics"
        }
    ]
}
edges.append(ap_demand_ap)
edges.append(ap_traffic_ap)

In [11]:
air_traffic_json['nodes'] = nodes
air_traffic_json['edges'] = edges

In [12]:
with open(os.path.join("config.json"), "w") as f:
    json.dump(air_traffic_json, f, indent=4)

## Run GraphStorm `gconstruct` Command to Process Air Traffic Data for Using GraphStorm

In [1]:
!pip install graphstorm

# If using GPU instances
# for CUDA 11
# !pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# !pip install dgl==1.1.3+cu118 -f https://data.dgl.ai/wheels/cu118/repo.html

# for CUDA 12
# !pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# !pip install dgl==1.1.3+cu121 -f https://data.dgl.ai/wheels/cu121/repo.html

# If using CPU instances
!pip install torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install dgl==1.1.3 -f https://data.dgl.ai/wheels-internal/repo.html

Looking in indexes: https://download.pytorch.org/whl/cpu
Looking in links: https://data.dgl.ai/wheels-internal/repo.html


In [2]:
!python -m graphstorm.gconstruct.construct_graph \
           --conf-file config.json \
           --output-dir gs_1p/ \
           --num-parts 1 \
           --graph-name air_traffic

INFO:root:The graph has 1 node types and 2 edge types.
INFO:root:Node type airport has 471 nodes
INFO:root:Edge type ('airport', 'demand', 'airport') has 221370 edges
INFO:root:Edge type ('airport', 'traffic', 'airport') has 8530 edges
INFO:root:Node type airport has features: ['latitude', 'longitude', 'inventory_amounts', 'train_mask', 'val_mask', 'test_mask'].
INFO:root:Train/val/test on airport with mask train_mask, val_mask, test_mask: 376, 47, 47
INFO:root:Note: Custom train, validate, test mask information for nodes are not collected.
INFO:root:Edge type ('airport', 'demand', 'airport') has features: ['demands'].
INFO:root:Edge type ('airport', 'traffic', 'airport') has features: ['capacity', 'traffics'].
The graph has 1 node types and balance among 4 types
Converting to homogeneous graph takes 0.003s, peak mem: 4.975 GB
Save partitions: 0.007 seconds, peak memory: 6.743 GB
There are 229900 edges in the graph and 0 edge cuts for 1 partitions.
INFO:root:Graph construction generate