In [1]:
import dgl
import math
import torch
import numpy as np
import networkx as nx
from os import path
from pathlib import Path
from copy import deepcopy
from dgl.data import DGLDataset
from ToyDGLDataset import ToyDGLDataset
from GraphDatasetInfo import (DistributionType, Distribution, GraphSubdatasetInfo, GraphDatasetInfo)


Using backend: pytorch


In [2]:
# default stuff
toyname = 'Toy5'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets', toyname)
graphCountLarge = 2000
graphCountSmall = 100

fewNodesPerGraph0 = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=1, maximum=10, 
    mean=6, standardDeviation=1)

fewNodesPerGraph1 = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=5, maximum=20, 
    mean=13, standardDeviation=1)

fewNodesPerGraphSameDist = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=1, maximum=20, 
    mean=10, standardDeviation=1)

manyNodesPerGraph0 = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=30, standardDeviation=10)

manyNodesPerGraph1 = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=65, standardDeviation=10)

manyNodesPerGraphSameDist = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=65, standardDeviation=10)

nFeatMapping = {'P_t': 0, 'Eta': 1, 'Phi': 2, 'Mass': 3, 'Type': 4}

defaultNodeFeat = [
    Distribution(10, 100, DistributionType.uniform), # index 0 -> P_t
    Distribution(-10, 10, DistributionType.uniform), # index 1 -> Eta
    Distribution(0, 2 * math.pi, DistributionType.uniform), # index 2 -> Phi
    Distribution(0.001, 1, DistributionType.uniform), # index 3 -> Mass
    Distribution(0, 2, DistributionType.uniform, roundToNearestInt=True) # index 4 -> Type
]

eFeatMapping = {'DeltaEta': 0, 'DeltaPhi': 1, 'RapiditySquared': 2}

edgeFeat = [
    None, # index 0 -> DeltaEta
    None, # index 1 -> DeltaPhi
    None # index 2 -> RapiditySquared
]

graphlabel = [0, 1]
splitPercentages = {'train': 0.6, 'valid': 0.2, 'test': 0.2}

In [3]:
datasetname = toyname + '_' + 'fewGraphs_fewNodes_SameNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountSmall, nodesPerGraph=fewNodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

#can be loaded from json file like below:
#graphdatasetInfo = GraphDatasetInfo.LoadFromJsonfile(path.join(outputFolder, f'{graphdatasetInfo.name}.json'))

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 100/100 [00:00<00:00, 801.47it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 100/100 [00:00<00:00, 915.46it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 200
Number of all nodes in all graphs: 1895
Number of all edges in all graphs: 16244
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [4]:
datasetname = toyname + '_' + 'fewGraphs_fewNodes_DifferentNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountSmall, nodesPerGraph=fewNodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = fewNodesPerGraph1
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 100/100 [00:00<00:00, 826.42it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 100/100 [00:00<00:00, 981.55it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 200
Number of all nodes in all graphs: 1817
Number of all edges in all graphs: 17466
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [5]:
datasetname = toyname + '_' + 'fewGraphs_manyNodes_SameNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountSmall, nodesPerGraph=manyNodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 100/100 [00:01<00:00, 94.06it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 100/100 [00:01<00:00, 96.67it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 200
Number of all nodes in all graphs: 12995
Number of all edges in all graphs: 852792
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [6]:
datasetname = toyname + '_' + 'fewGraphs_manyNodes_DifferentNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountSmall, nodesPerGraph=manyNodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)


name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = manyNodesPerGraph1
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 100/100 [00:01<00:00, 96.69it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 100/100 [00:00<00:00, 338.41it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 200
Number of all nodes in all graphs: 9533
Number of all edges in all graphs: 522768
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [7]:
datasetname = toyname + '_' + 'manyGraphs_fewNodes_SameNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountLarge, nodesPerGraph=fewNodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 2000/2000 [00:02<00:00, 948.18it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 2000/2000 [00:02<00:00, 984.00it/s] 


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 4000
Number of all nodes in all graphs: 38050
Number of all edges in all graphs: 328282
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [8]:
datasetname = toyname + '_' + 'manyGraphs_fewNodes_DifferentNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountLarge, nodesPerGraph=fewNodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = fewNodesPerGraph1
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 2000/2000 [00:02<00:00, 898.31it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 2000/2000 [00:01<00:00, 1119.33it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 4000
Number of all nodes in all graphs: 35942
Number of all edges in all graphs: 340212
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [9]:
datasetname = toyname + '_' + 'manyGraphs_manyNodes_SameNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountLarge, nodesPerGraph=manyNodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 2000/2000 [00:18<00:00, 105.70it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 2000/2000 [00:18<00:00, 108.69it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 4000
Number of all nodes in all graphs: 258777
Number of all edges in all graphs: 16887080
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [10]:
datasetname = toyname + '_' + 'manyGraphs_manyNodes_DifferentNodeCountDistributions'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountLarge, nodesPerGraph=manyNodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, edgeFeat=edgeFeat)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)
"""
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

name = f'GraphClass{graphlabel[1]}'

graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = manyNodesPerGraph1
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)
"""
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Distribution(
    distributionType=DistributionType.truncnorm, 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

subdatasets = []
subdatasets.append(graphsubdatasetInfo1)
subdatasets.append(graphsubdatasetInfo0)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 2000/2000 [00:19<00:00, 103.51it/s]
(2/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 2000/2000 [00:05<00:00, 381.88it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 4000
Number of all nodes in all graphs: 190522
Number of all edges in all graphs: 10480506
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

rng = np.random.default_rng(seed=42)
nxgraphs = graphdatasetInfo.ToNetworkxGraphList()

print(f'Edge Features \n nx: {list(nxgraphs[0].edges.data())[0]},\n dgl: {dglgraph.edata}')

print(f'Graphs in the dataset: {len(nxgraphs)}')
print('Node features of the first graph in the graph list: ')
for node in nxgraphs[0].nodes(data=True):
    print(node)

print("Fully connected graph with edge features: ")
print(nxgraphs[0].edges.data())

pos = nx.spring_layout(nxgraphs[0])
options = {
    "node_color": "#A0CBE2",
    "width": 0.5,
    "with_labels": True,
    "node_size": 600
}
plt.figure(1,figsize=(10,10)) 
nx.draw(nxgraphs[0], pos, **options)

dglgraph = dgl.from_networkx(
    nxgraphs[0], 
    node_attrs=nodeFeatures.keys(), 
    edge_attrs=edgeFeatures.keys())
print(f'Node count - nx: {nxgraphs[0].number_of_nodes()}, dgl: {dglgraph.num_nodes()}')
print(f'Edge count - nx: {nxgraphs[0].number_of_edges()}, dgl: {dglgraph.num_edges()}')

print(f'Node Features \n nx: {nxgraphs[0].nodes(data=True)[0]},\n dgl: {dglgraph.ndata}')

print(dglgraph)