In [1]:
import dgl
import math
import torch
import numpy as np
import networkx as nx
from os import path
from pathlib import Path
from copy import deepcopy
from dgl.data import DGLDataset
from Distribution import Distribution
from ToyDGLDataset_v2 import ToyDGLDataset_v2
from GraphDatasetInfo import (GraphSubdatasetInfo, GraphDatasetInfo)


Using backend: pytorch


In [2]:
graphCountPerClass = 50000

NodesPerGraph0 = Distribution(
    distributionType='truncnorm', 
    minimum=2, maximum=20, 
    mean=2, standardDeviation=1)

NodesPerGraph1 = Distribution(
    distributionType='truncnorm', 
    minimum=2, maximum=20, 
    mean=13, standardDeviation=1)

NodesPerGraphSameDist = Distribution(
    distributionType='truncnorm', 
    minimum=2, maximum=20, 
    mean=10, standardDeviation=1)

nFeatMapping = {'P_t': 0, 'Eta': 1, 'Phi': 2, 'Mass': 3, 'Type': 4}
defaultNodeFeat = [
    Distribution(10, 100, 'uniform'), # index 0 -> P_t
    Distribution(-10, 10, 'uniform'), # index 1 -> Eta
    Distribution(0, 2 * math.pi, 'uniform'), # index 2 -> Phi
    Distribution(0.001, 1, 'uniform'), # index 3 -> Mass
    Distribution(0, 2, 'uniform', roundToNearestInt=True) # index 4 -> Type
]

eFeatMapping = {'DeltaEta': 0, 'DeltaPhi': 1, 'RapiditySquared': 2}
gFeatMapping = {'NodeCount': 0, 'TotalP_t': 1}


graphlabel = [0, 1]
splitPercentages = {'train': 0.7, 'valid': 0.2, 'test': 0.1}

P_t_VeryDiffGraphClass1 = Distribution(
    distributionType='uniform', 
    minimum=60, maximum=80)

P_t_SlightlyDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=10, maximum=120, 
    mean=60, standardDeviation=5)

P_t_SlightlyDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=10, maximum=120, 
    mean=70, standardDeviation=5)

Eta_SlightlyDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=0, standardDeviation=1)

Eta_SlightlyDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=1, standardDeviation=1)

Eta_VeryDiffGraphClass0 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=-5, standardDeviation=1)

Eta_VeryDiffGraphClass1 = Distribution(
    distributionType='truncnorm', 
    minimum=-10, maximum=10, 
    mean=5, standardDeviation=1)

In [3]:
toyname = 'Toy0_v3_1'
rootOutputFolder = path.join('/home/andrew/GNN_Sandbox/GraphToyDatasets_v3', toyname)

def generateDataset(nodeMeanDiff):
    datasetname = f'{toyname}_{nodeMeanDiff}NodeCountMeanDiff'
    outputFolder = path.join(rootOutputFolder, datasetname)
    Path(outputFolder).mkdir(parents=True, exist_ok=True)

    name = f'GraphClass{graphlabel[0]}'

    nodeFeat = deepcopy(defaultNodeFeat)

    graphsubdatasetInfo0 = GraphSubdatasetInfo(
        name=name, label=graphlabel[0], 
        graphCount=graphCountPerClass, nodesPerGraph=deepcopy(NodesPerGraph1), 
        nodeFeatMapping=nFeatMapping, nodeFeat=nodeFeat, 
        edgeFeatMapping=eFeatMapping, graphFeatMapping=gFeatMapping)
    graphsubdatasetInfo0.nodesPerGraph.mean -= nodeMeanDiff
    #graphsubdatasetInfo0.nodeFeat[nFeatMapping['P_t']] = P_t_SlightlyDiffGraphClass0

    name = f'GraphClass{graphlabel[1]}'
    graphsubdatasetInfo1 = deepcopy(graphsubdatasetInfo0)
    graphsubdatasetInfo1.name = name
    graphsubdatasetInfo1.label = graphlabel[1]
    graphsubdatasetInfo1.nodesPerGraph = deepcopy(NodesPerGraph1)
    #P_t_SlightlyDiffGraphClass1.mean = P_t_SlightlyDiffGraphClass0.mean * (1.0 + p/100)

    subdatasets = []
    subdatasets.append(graphsubdatasetInfo0)
    subdatasets.append(graphsubdatasetInfo1)

    graphdatasetInfo = GraphDatasetInfo(
        name=datasetname,
        splitPercentages=splitPercentages,
        graphSubDatasetInfos=subdatasets
    )

    graphdatasetInfo.SaveToJsonfile(outputFolder, f'{graphdatasetInfo.name}.json')
    dataset = ToyDGLDataset_v2(name=datasetname, info=graphdatasetInfo, shuffleDataset=True, save_dir=outputFolder)
    return datasetname + " done"

import time
now = time.time()

import multiprocessing as mp
Processes = 4 # too many will fill RAM and freeze the operating system. 
with mp.Pool(Processes) as pool:
    NodeCountMeanDiff = [(0,), (1,), (2,), (3,), (4,), (5,), (7,), (9,), (11,)]
    results = [pool.apply_async(generateDataset, ncmd) for ncmd in NodeCountMeanDiff]
    for r in results:
        print('\t', r.get())

end = time.time()
elapsed = end - now
print(f'{elapsed} seconds elapsed')
#can be loaded from json file like below:
#graphdatasetInfo = GraphDatasetInfo.LoadFromJsonfile(path.join(outputFolder, f'{graphdatasetInfo.name}.json'))

(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:24<00:00, 2000.24it/s]
(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:26<00:00, 1858.83it/s]
(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:28<00:00, 1754.38it/s]
(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:30<00:00, 1639.18it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:30<00:00, 1641.85it/s]
(2/2) Generating graphs from SubDataset GraphClass1:  94%|█████████▎| 46821/50000 [00:28<00:01, 1609.75it/s]

Calculating and saving histograms...


(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:30<00:00, 1640.69it/s]
(2/2) Generating graphs from SubDataset GraphClass1:  89%|████████▉ | 44423/50000 [00:26<00:03, 1639.76it/s]

Calculating and saving histograms...


(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:30<00:00, 1641.31it/s]
(2/2) Generating graphs from SubDataset GraphClass1:  94%|█████████▍| 47185/50000 [00:28<00:01, 1703.41it/s]

Calculating and saving histograms...


(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:30<00:00, 1658.02it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 1100184
Number of all edges in all graphs: 11339586
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.


  fig.savefig(outputFilePath)
(1/2) Generating graphs from SubDataset GraphClass0:  18%|█▊        | 9111/50000 [00:04<00:17, 2333.38it/s]

Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000


(1/2) Generating graphs from SubDataset GraphClass0:  19%|█▊        | 9345/50000 [00:04<00:17, 2320.06it/s]

Number of all nodes in all graphs: 1149538


(1/2) Generating graphs from SubDataset GraphClass0:  20%|█▉        | 9814/50000 [00:04<00:17, 2330.31it/s]

Number of all edges in all graphs: 12274086
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


  fig.savefig(outputFilePath)
(1/2) Generating graphs from SubDataset GraphClass0:  37%|███▋      | 18522/50000 [00:08<00:13, 2287.88it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0:   8%|▊         | 4120/50000 [00:01<00:19, 2368.36it/s]]

Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000


(1/2) Generating graphs from SubDataset GraphClass0:   9%|▊         | 4361/50000 [00:01<00:19, 2379.70it/s]]

Number of all nodes in all graphs: 1200135


(1/2) Generating graphs from SubDataset GraphClass0:  10%|▉         | 4846/50000 [00:02<00:18, 2398.79it/s]]

Number of all edges in all graphs: 13337554
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:  29%|██▉       | 14375/50000 [00:06<00:18, 1963.06it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:22<00:00, 2198.22it/s]
  fig.savefig(outputFilePath)
(1/2) Generating graphs from SubDataset GraphClass0:  53%|█████▎    | 26402/50000 [00:09<00:08, 2708.04it/s]

Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000


(2/2) Generating graphs from SubDataset GraphClass1:  12%|█▏        | 5832/50000 [00:03<00:28, 1567.08it/s]]

Number of all nodes in all graphs: 1250321


(1/2) Generating graphs from SubDataset GraphClass0:  54%|█████▍    | 27221/50000 [00:10<00:08, 2640.96it/s]

Number of all edges in all graphs: 14491114
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:  99%|█████████▉| 49516/50000 [00:21<00:00, 2393.23it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:21<00:00, 2332.03it/s]
(2/2) Generating graphs from SubDataset GraphClass1:  29%|██▉       | 14596/50000 [00:08<00:21, 1664.89it/s]

	 Toy0_v3_1_0NodeCountMeanDiff done
	 Toy0_v3_1_1NodeCountMeanDiff done
	 Toy0_v3_1_2NodeCountMeanDiff done
	 Toy0_v3_1_3NodeCountMeanDiff done


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:19<00:00, 2601.92it/s]
(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:17<00:00, 2806.07it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:30<00:00, 1636.92it/s]
(2/2) Generating graphs from SubDataset GraphClass1:  58%|█████▊    | 28952/50000 [00:17<00:12, 1667.55it/s]

Calculating and saving histograms...


(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:31<00:00, 1610.22it/s]


Calculating and saving histograms...


(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:30<00:00, 1627.55it/s]
(2/2) Generating graphs from SubDataset GraphClass1:  56%|█████▌    | 27871/50000 [00:16<00:12, 1725.34it/s]

Calculating and saving histograms...


(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:29<00:00, 1673.60it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 899452
Number of all edges in all graphs: 8519656
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


  fig.savefig(outputFilePath)
  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 1050027
Number of all edges in all graphs: 10484330
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


  fig.savefig(outputFilePath)


Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0:   0%|          | 0/50000 [00:00<?, ?it/s]

Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000


(1/2) Generating graphs from SubDataset GraphClass0:   1%|          | 304/50000 [00:00<00:16, 3036.00it/s]

Number of all nodes in all graphs: 999954


(1/2) Generating graphs from SubDataset GraphClass0:   2%|▏         | 911/50000 [00:00<00:16, 3002.88it/s]

Number of all edges in all graphs: 9733186
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:   8%|▊         | 3988/50000 [00:01<00:15, 3060.64it/s]

Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000


(1/2) Generating graphs from SubDataset GraphClass0:   9%|▊         | 4302/50000 [00:01<00:14, 3082.04it/s]

Number of all nodes in all graphs: 803341


(1/2) Generating graphs from SubDataset GraphClass0:  10%|▉         | 4925/50000 [00:01<00:14, 3075.61it/s]

Number of all edges in all graphs: 7747836
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']


(1/2) Generating graphs from SubDataset GraphClass0:  17%|█▋        | 8296/50000 [00:02<00:13, 3079.54it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0:  23%|██▎       | 11627/50000 [00:03<00:12, 3019.64it/s]

	 Toy0_v3_1_4NodeCountMeanDiff done


(1/2) Generating graphs from SubDataset GraphClass0:  27%|██▋       | 13444/50000 [00:04<00:12, 3013.73it/s]

Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0:  34%|███▎      | 16780/50000 [00:05<00:11, 2927.09it/s]

	 Toy0_v3_1_5NodeCountMeanDiff done
	 Toy0_v3_1_7NodeCountMeanDiff done
Done saving data into cached files.


(1/2) Generating graphs from SubDataset GraphClass0:  40%|███▉      | 19800/50000 [00:06<00:10, 2847.66it/s]

	 Toy0_v3_1_9NodeCountMeanDiff done


(1/2) Generating graphs from SubDataset GraphClass0: 100%|██████████| 50000/50000 [00:16<00:00, 2954.31it/s]
(2/2) Generating graphs from SubDataset GraphClass1: 100%|██████████| 50000/50000 [00:28<00:00, 1747.75it/s]


Calculating and saving histograms...


  fig.savefig(outputFilePath)


Num Graph classes: 2
Graph classes: [0, 1]
Number of graphs: 100000
Number of all nodes in all graphs: 743360
Number of all edges in all graphs: 7419598
Dim node features: 5
Node feature keys: ['P_t', 'Eta', 'Phi', 'Mass', 'Type']
Dim edge features: 3
Edge feature keys: ['DeltaEta', 'DeltaPhi', 'RapiditySquared']
Done saving data into cached files.
	 Toy0_v3_1_11NodeCountMeanDiff done
502.7153697013855 seconds elapsed


In [4]:
graph, label = dataset[0]
print(graph.gdata)
print(graph.gdata['feat'])

NameError: name 'dataset' is not defined

In [None]:
percentageDiffs = [0, 1, 2, 3, 4, 5, 7, 10, 15]
for p in percentageDiffs:
    print(1 + p/100)