In [1]:
import dgl
import math
import torch
import numpy as np
import networkx as nx
from os import path
from pathlib import Path
from copy import deepcopy
from dgl.data import DGLDataset
from Distribution import Distribution
from ToyDGLDataset_v2 import ToyDGLDataset_v2
from GraphDatasetInfo import (GraphSubdatasetInfo, GraphDatasetInfo)


Using backend: pytorch


In [2]:
graphCountPerClass = 50000

NodesPerGraph0 = Distribution(
    distributionType='truncnorm', 
    minimum=2, maximum=10, 
    mean=6, standardDeviation=1)

NodesPerGraph1 = Distribution(
    distributionType='truncnorm', 
    minimum=5, maximum=20, 
    mean=13, standardDeviation=1)

NodesPerGraphSameDist = Distribution(
    distributionType='truncnorm', 
    minimum=2, maximum=20, 
    mean=10, standardDeviation=1)

nFeatMapping = {'P_t': 0, 'Eta': 1, 'Phi': 2, 'Mass': 3, 'Type': 4}
defaultNodeFeat = [
    Distribution(10, 100, 'uniform'), # index 0 -> P_t
    Distribution(-10, 10, 'uniform'), # index 1 -> Eta
    Distribution(0, 2 * math.pi, 'uniform'), # index 2 -> Phi
    Distribution(0.001, 1, 'uniform'), # index 3 -> Mass
    Distribution(0, 2, 'uniform', roundToNearestInt=True) # index 4 -> Type
]

eFeatMapping = {'DeltaEta': 0, 'DeltaPhi': 1, 'RapiditySquared': 2}
gFeatMapping = {'NodeCount': 0, 'TotalP_t': 1}


graphlabel = [0, 1]
splitPercentages = {'train': 0.7, 'valid': 0.2, 'test': 0.1}

P_t_VeryDiffGraphClass1 = Distribution(
    distributionType='uniform', 
    minimum=60, maximum=80)

P_t_SlightlyDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)

P_t_SlightlyDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)

Eta_SlightlyDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

Eta_SlightlyDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

Eta_VeryDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=-5, standardDeviation=1)

Eta_VeryDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=5, standardDeviation=1)

In [3]:
toyname = 'Toy2_v3'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v3', toyname)

percentageDiffs = [0, 1, 2, 3, 4, 5, 7, 10, 15]

for p in percentageDiffs:
    datasetname = f'{toyname}_{p}percentDiff'
    outputFolder = path.join(rootOutputFolder, datasetname)
    Path(outputFolder).mkdir(parents=True, exist_ok=True)

    name = f'GraphClass{graphlabel[0]}'

    nodeFeat = deepcopy(defaultNodeFeat)

    graphsubdatasetInfo0 = GraphSubdatasetInfo(
        name=name, label=graphlabel[0], 
        graphCount=graphCountPerClass, nodesPerGraph=NodesPerGraphSameDist, 
        nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
        edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
    graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0

    name = f'GraphClass{graphlabel[1]}'
    graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
    graphsubdatasetInfo1.name = name
    graphsubdatasetInfo1.label = graphlabel[1]
    P_t_SlightlyDiffGraphClass1.mean = P_t_SlightlyDiffGraphClass0.mean * (1.0 + p/100)
    graphsubdatasetInfo1.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass1

    subdatasets = []
    subdatasets.append(graphsubdatasetInfo0)
    subdatasets.append(graphsubdatasetInfo1)

    graphdatasetInfo = GraphDatasetInfo(
        name=datasetname,
        splitPercentages=splitPercentages,
        graphSubDatasetInfos=subdatasets
    )

    graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
    dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)

#can be loaded from json file like below:
#graphdatasetInfo = GraphDatasetInfo.LoadFromJsonfile(path.join(outputFolder, f'{graphdatasetInfo.name}.json'))

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:47<00:00, 1047.13it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:46<00:00, 1071.49it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950111
Number of all edges in all graphs: 8185536
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 104/50000 [00:00<00:48, 1037.62it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:46<00:00, 1065.88it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:46<00:00, 1064.15it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 949531
Number of all edges in all graphs: 8175044
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 105/50000 [00:00<00:47, 1042.72it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:49<00:00, 1005.20it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:49<00:00, 1010.63it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950348
Number of all edges in all graphs: 8189098
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 102/50000 [00:00<00:49, 1016.39it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:51<00:00, 977.44it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:50<00:00, 986.75it/s] 


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950112
Number of all edges in all graphs: 8185812
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 97/50000 [00:00<00:51, 966.34it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:50<00:00, 989.65it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:49<00:00, 1008.32it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950087
Number of all edges in all graphs: 8184922
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 94/50000 [00:00<00:53, 930.38it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:50<00:00, 997.60it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:49<00:00, 1002.33it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950017
Number of all edges in all graphs: 8184026
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 64/50000 [00:00<01:18, 636.74it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:51<00:00, 979.02it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:51<00:00, 971.00it/s] 


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 950138
Number of all edges in all graphs: 8185974
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 82/50000 [00:00<01:01, 812.06it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:52<00:00, 959.72it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:51<00:00, 977.83it/s] 


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 949756
Number of all edges in all graphs: 8179092
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 66/50000 [00:00<01:16, 655.75it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:51<00:00, 966.26it/s] 
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:51<00:00, 972.00it/s] 


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 949694
Number of all edges in all graphs: 8176344
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

<Figure size 720x504 with 0 Axes>

In [4]:
graph, label = dataset[0]
print(graph.gdata)
print(graph.gdata['feat'])

{'feat': tensor([ 8.0000, 82.1605], dtype=torch.float64)}
tensor([ 8.0000, 82.1605], dtype=torch.float64)


In [5]:
percentageDiffs = [0, 1, 2, 3, 4, 5, 7, 10, 15]
for p in percentageDiffs:
    print(1 + p/100)

1.0
1.01
1.02
1.03
1.04
1.05
1.07
1.1
1.15
