In [1]:
import dgl
import math
import torch
import numpy as np
import networkx as nx
from os import path
from pathlib import Path
from copy import deepcopy
from dgl.data import DGLDataset
from Distribution import Distribution
from ToyDGLDataset_v2 import ToyDGLDataset_v2
from GraphDatasetInfo import (GraphSubdatasetInfo, GraphDatasetInfo)


Using backend: pytorch


In [2]:
graphCountPerClass = 50000

NodesPerGraph0 = Distribution(
    distributionType='truncnorm', 
    minimum=2, maximum=10, 
    mean=6, standardDeviation=1)

NodesPerGraph1 = Distribution(
    distributionType='truncnorm', 
    minimum=5, maximum=20, 
    mean=13, standardDeviation=1)

NodesPerGraphSameDist = Distribution(
    distributionType='truncnorm', 
    minimum=2, maximum=20, 
    mean=10, standardDeviation=1)

nFeatMapping = {'P_t': 0, 'Eta': 1, 'Phi': 2, 'Mass': 3, 'Type': 4}
defaultNodeFeat = [
    Distribution(10, 100, 'uniform'), # index 0 -> P_t
    Distribution(-10, 10, 'uniform'), # index 1 -> Eta
    Distribution(0, 2 * math.pi, 'uniform'), # index 2 -> Phi
    Distribution(0.001, 1, 'uniform'), # index 3 -> Mass
    Distribution(0, 2, 'uniform', roundToNearestInt=True) # index 4 -> Type
]

eFeatMapping = {'DeltaEta': 0, 'DeltaPhi': 1, 'RapiditySquared': 2}
gFeatMapping = {'NodeCount': 0, 'TotalP_t': 1}


graphlabel = [0, 1]
splitPercentages = {'train': 0.7, 'valid': 0.2, 'test': 0.1}

P_t_VeryDiffGraphClass1 = Distribution(
    distributionType='uniform', 
    minimum=60, maximum=80)

P_t_SlightlyDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)

P_t_SlightlyDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)

Eta_SlightlyDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

Eta_SlightlyDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

Eta_VeryDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=-5, standardDeviation=1)

Eta_VeryDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=5, standardDeviation=1)

In [3]:
toyname = 'Toy0_v2'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v2', toyname)

datasetname = toyname + '_0'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

#can be loaded from json file like below:
#graphdatasetInfo = GraphDatasetInfo.LoadFromJsonfile(path.join(outputFolder, f'{graphdatasetInfo.name}.json'))

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:24<00:00, 2031.31it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:24<00:00, 2018.47it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950210
Number of all edges in all graphs: 8186288
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [4]:
datasetname = toyname + '_1'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = NodesPerGraph1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1


subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:19<00:00, 2555.03it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:29<00:00, 1667.75it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 899785
Number of all edges in all graphs: 8528868
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [5]:
toyname = 'Toy1_v2'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v2', toyname)

datasetname = toyname + '_0'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_VeryDiffGraphClass1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:24<00:00, 2062.50it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:24<00:00, 2060.64it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950149
Number of all edges in all graphs: 8185618
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [6]:
datasetname = toyname + '_1'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = NodesPerGraph1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_VeryDiffGraphClass1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:19<00:00, 2593.18it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:30<00:00, 1625.33it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 900026
Number of all edges in all graphs: 8534608
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [7]:
toyname = 'Toy2_v2'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v2', toyname)

datasetname = toyname + '_0'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:49<00:00, 1001.06it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:48<00:00, 1024.48it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950422
Number of all edges in all graphs: 8190976
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [8]:
datasetname = toyname + '_1'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = NodesPerGraph1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:44<00:00, 1113.85it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:52<00:00, 944.38it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 899813
Number of all edges in all graphs: 8530744
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [9]:
toyname = 'Toy3_v2'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v2', toyname)

datasetname = toyname + '_0'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [01:13<00:00, 681.86it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [01:11<00:00, 700.12it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 949846
Number of all edges in all graphs: 8180270
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [10]:
datasetname = toyname + '_1'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = NodesPerGraph1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [01:08<00:00, 726.10it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [01:17<00:00, 641.25it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 899881
Number of all edges in all graphs: 8533008
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [11]:
toyname = 'Toy4_v2'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v2', toyname)

datasetname = toyname + '_0'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [01:13<00:00, 681.30it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [01:14<00:00, 670.45it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950403
Number of all edges in all graphs: 8190358
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [12]:
datasetname = toyname + '_1'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = NodesPerGraph1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [01:08<00:00, 726.15it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [01:18<00:00, 639.08it/s]


Calculating and saving histograms...
Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 900010
Number of all edges in all graphs: 8531668
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [13]:
toyname = 'Toy5_v2'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v2', toyname)

datasetname = toyname + '_0'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:51<00:00, 980.31it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:50<00:00, 983.91it/s] 


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950244
Number of all edges in all graphs: 8187288
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [14]:
datasetname = toyname + '_1'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = NodesPerGraph1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_SlightlyDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:46<00:00, 1081.03it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:57<00:00, 863.00it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 899466
Number of all edges in all graphs: 8523448
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [15]:
toyname = 'Toy6_v2'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v2', toyname)

datasetname = toyname + '_0'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:51<00:00, 973.37it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:50<00:00, 984.68it/s] 


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950455
Number of all edges in all graphs: 8191650
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [16]:
datasetname = toyname + '_1'
outputFolder = path.join(rootOutputFolder, datasetname)
Path(outputFolder).mkdir(parents=True, exist_ok=True)

name = f'GraphClass{graphlabel[0]}'

nodeFeat = deepcopy(defaultNodeFeat)

graphsubdatasetInfo0 = GraphSubdatasetInfo(
    name=name, label=graphlabel[0], 
    graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraph0, 
    nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
    edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
#graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0
graphsubdatasetInfo0.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass0

name = f'GraphClass{graphlabel[1]}'
graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
graphsubdatasetInfo1.name = name
graphsubdatasetInfo1.label = graphlabel[1]
graphsubdatasetInfo1.nodesPerGraph = NodesPerGraph1
#graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1
graphsubdatasetInfo1.nodeFeat[nFeatMapping['Eta']] = Eta_VeryDiffGraphClass1

subdatasets = []
subdatasets.append(graphsubdatasetInfo0)
subdatasets.append(graphsubdatasetInfo1)

graphdatasetInfo = GraphDatasetInfo(
    name=datasetname,
    splitPercentages=splitPercentages,
    graphSubDatasetInfos=subdatasets
)

graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:45<00:00, 1110.28it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:56<00:00, 890.57it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 900412
Number of all edges in all graphs: 8538846
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

In [17]:
graph, label = dataset[0]
print(graph.gdata)
print(graph.gdata['feat'])

{'feat': tensor([12.0000, 47.6482], dtype=torch.float64)}
tensor([12.0000, 47.6482], dtype=torch.float64)
