added benchmark results. Fixed bug in loader.

aimat-lab · Feb 27, 2024 · bce2ffd · bce2ffd
1 parent 70f70ea
commit bce2ffd
Show file tree

Hide file tree

Showing 8 changed files with 532 additions and 10 deletions.
diff --git a/kgcnn/io/loader.py b/kgcnn/io/loader.py
@@ -189,7 +189,7 @@ def generator():
                 if assignment_to_id[i] is None:
                     values = np.array(array_list, dtype=inputs[i]["dtype"])
                     if padded_disjoint:
-                        out = pad_at_axis(values, (1, 0), axis=0)
+                        values = pad_at_axis(values, (1, 0), axis=0)
                     out[i] = values
                 else:
                     values = np.concatenate(array_list, axis=0)
@@ -207,14 +207,17 @@ def generator():
                         counts = np.concatenate([np.array([num_pad_required], dtype=counts.dtype), counts], axis=0)
                         out_counts[i] = counts
 
-                    if out[pos_count[ids]] is None:
-                        out[pos_count[ids]] = counts
-                    if out[pos_batch_id[ids]] is None:
-                        out[pos_batch_id[ids]] = np.repeat(
-                            np.arange(len(counts), dtype="int64"), repeats=counts)
-                    if out[pos_subgraph_id[ids]] is None:
-                        out[pos_subgraph_id[ids]] = np.concatenate(
-                            [np.arange(x, dtype="int64") for x in counts], axis=0)
+                    if ids in pos_count:
+                        if out[pos_count[ids]] is None:
+                            out[pos_count[ids]] = counts
+                    if ids in pos_batch_id:
+                        if out[pos_batch_id[ids]] is None:
+                            out[pos_batch_id[ids]] = np.repeat(
+                                np.arange(len(counts), dtype="int64"), repeats=counts)
+                    if ids in pos_subgraph_id:
+                        if out[pos_subgraph_id[ids]] is None:
+                            out[pos_subgraph_id[ids]] = np.concatenate(
+                                [np.arange(x, dtype="int64") for x in counts], axis=0)
 
             # Indices
             for i in inputs.keys():

diff --git a/kgcnn/losses/losses.py b/kgcnn/losses/losses.py
@@ -9,15 +9,21 @@
 @ks.saving.register_keras_serializable(package='kgcnn', name='MeanAbsoluteError')
 class MeanAbsoluteError(Loss):
 
-    def __init__(self, reduction="sum_over_batch_size", name="mean_absolute_error", dtype=None):
+    def __init__(self, reduction="sum_over_batch_size", name="mean_absolute_error",
+                 padded_disjoint: bool = False, dtype=None):
         super(MeanAbsoluteError, self).__init__(reduction=reduction, name=name, dtype=dtype)
+        self.padded_disjoint = padded_disjoint
 
     def call(self, y_true, y_pred):
+        if self.padded_disjoint:
+            y_true = y_true[1:]
+            y_pred = y_pred[1:]
         out = mean_absolute_error(y_true, y_pred)
         return out
 
     def get_config(self):
         config = super(MeanAbsoluteError, self).get_config()
+        config.update({"padded_disjoint": self.padded_disjoint})
         return config
 
 
@@ -53,6 +59,45 @@ def get_config(self):
         return config
 
 
+@ks.saving.register_keras_serializable(package='kgcnn', name='DisjointForceMeanAbsoluteError')
+class DisjointForceMeanAbsoluteError(Loss):
+    """This is dummy class. Not working at the moment as intended.
+
+    We need to pass the node ids here somehow.
+    """
+
+    def __init__(self, reduction="sum_over_batch_size", name="force_mean_absolute_error",
+                 squeeze_states: bool = True, find_padded_atoms: bool = True, dtype=None):
+        super(DisjointForceMeanAbsoluteError, self).__init__(reduction=reduction, name=name, dtype=dtype)
+        self.squeeze_states = squeeze_states
+        self.find_padded_atoms = find_padded_atoms
+
+    def call(self, y_true, y_pred):
+        # Shape: ([N], 3, S)
+        if self.find_padded_atoms:
+            check_nonzero = ops.logical_not(
+                ops.all(ops.isclose(y_true, ops.convert_to_tensor(0., dtype=y_true.dtype)), axis=1))
+            y_pred = y_pred * ops.cast(ops.expand_dims(check_nonzero, axis=1), dtype=y_pred.dtype)
+            row_count = ops.sum(ops.cast(check_nonzero, dtype="int32"), axis=0)
+            row_count = ops.where(row_count < 1, 1, row_count)  # Prevent divide by 0.
+            norm = 1 / ops.cast(row_count, dtype=y_true.dtype)
+        else:
+            norm = 1/ops.shape(y_true)[0]
+
+        diff = ops.abs(y_true-y_pred)
+        out = ops.mean(diff, axis=1)
+        out = ops.sum(out, axis=0)*norm
+        if not self.squeeze_states:
+            out = ops.mean(out, axis=-1)
+
+        return out
+
+    def get_config(self):
+        config = super(DisjointForceMeanAbsoluteError, self).get_config()
+        config.update({"find_padded_atoms": self.find_padded_atoms, "squeeze_states": self.squeeze_states})
+        return config
+
+
 @ks.saving.register_keras_serializable(package='kgcnn', name='BinaryCrossentropyNoNaN')
 class BinaryCrossentropyNoNaN(ks.losses.BinaryCrossentropy):
 

diff --git a/...lts/MatProjectGapDataset/Megnet_make_crystal_model/Megnet_MatProjectGapDataset_score.yaml b/...lts/MatProjectGapDataset/Megnet_make_crystal_model/Megnet_MatProjectGapDataset_score.yaml
@@ -0,0 +1,157 @@
+OS: posix_linux
+backend: tensorflow
+cuda_available: 'True'
+data_unit: eV
+date_time: '2024-02-24 06:26:37'
+device_id: '[LogicalDevice(name=''/device:CPU:0'', device_type=''CPU''), LogicalDevice(name=''/device:GPU:0'',
+  device_type=''GPU'')]'
+device_memory: '[]'
+device_name: '[{}, {''compute_capability'': (8, 0), ''device_name'': ''NVIDIA A100
+  80GB PCIe''}]'
+epochs:
+- 1000
+- 1000
+- 1000
+- 1000
+- 1000
+execute_folds:
+- 4
+kgcnn_version: 4.0.1
+learning_rate:
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+loss:
+- 0.015490625984966755
+- 0.015005970373749733
+- 0.013733846135437489
+- 0.015239803120493889
+- 0.013305951841175556
+max_learning_rate:
+- 0.0005000000237487257
+- 0.0005000000237487257
+- 0.0005000000237487257
+- 0.0005000000237487257
+- 0.0005000000237487257
+max_loss:
+- 0.4093307554721832
+- 0.4096440374851227
+- 0.40621218085289
+- 0.40501534938812256
+- 0.4052284359931946
+max_scaled_mean_absolute_error:
+- 0.6538791060447693
+- 0.655254602432251
+- 0.6496603488922119
+- 0.6482604742050171
+- 0.6476054787635803
+max_scaled_root_mean_squared_error:
+- 1.0478416681289673
+- 1.0511925220489502
+- 1.046431541442871
+- 1.0417505502700806
+- 1.0431451797485352
+max_val_loss:
+- 0.2568034529685974
+- 0.25980663299560547
+- 0.2764630913734436
+- 0.2725690007209778
+- 0.2516973912715912
+max_val_scaled_mean_absolute_error:
+- 0.40973129868507385
+- 0.41456735134124756
+- 0.44212689995765686
+- 0.4353194534778595
+- 0.4004021883010864
+max_val_scaled_root_mean_squared_error:
+- 0.7799133062362671
+- 0.7795096039772034
+- 0.8122066259384155
+- 0.8199198842048645
+- 0.7473491430282593
+min_learning_rate:
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+- 5.549999968934571e-06
+min_loss:
+- 0.012300114147365093
+- 0.012306654825806618
+- 0.011712302453815937
+- 0.01248718798160553
+- 0.011630120687186718
+min_scaled_mean_absolute_error:
+- 0.019647523760795593
+- 0.019690606743097305
+- 0.01873534917831421
+- 0.019993698224425316
+- 0.018590165302157402
+min_scaled_root_mean_squared_error:
+- 0.11326158046722412
+- 0.11402382701635361
+- 0.10926864296197891
+- 0.11012609302997589
+- 0.11126671731472015
+min_val_loss:
+- 0.12271781265735626
+- 0.12614120543003082
+- 0.1241195946931839
+- 0.12728427350521088
+- 0.12087533622980118
+min_val_scaled_mean_absolute_error:
+- 0.1960776001214981
+- 0.20183902978897095
+- 0.19857794046401978
+- 0.20317614078521729
+- 0.19241274893283844
+min_val_scaled_root_mean_squared_error:
+- 0.4753977060317993
+- 0.4847562611103058
+- 0.4655359387397766
+- 0.47441938519477844
+- 0.45533809065818787
+model_class: make_crystal_model
+model_name: Megnet
+model_version: '2023-12-05'
+multi_target_indices: null
+number_histories: 5
+scaled_mean_absolute_error:
+- 0.024746032431721687
+- 0.02400919795036316
+- 0.021968580782413483
+- 0.0244015920907259
+- 0.02126884274184704
+scaled_root_mean_squared_error:
+- 0.11563872545957565
+- 0.11580397933721542
+- 0.11044206470251083
+- 0.11158757656812668
+- 0.11241202801465988
+seed: 42
+time_list:
+- 1 day, 15:37:34.754554
+- 1 day, 14:15:22.432003
+- 1 day, 13:59:52.891144
+- 1 day, 14:25:57.611502
+- 1 day, 13:46:45.172808
+val_loss:
+- 0.12476463615894318
+- 0.12946417927742004
+- 0.12739905714988708
+- 0.12979497015476227
+- 0.12356005609035492
+val_scaled_mean_absolute_error:
+- 0.1991664171218872
+- 0.2071896493434906
+- 0.20360678434371948
+- 0.20720550417900085
+- 0.197036474943161
+val_scaled_root_mean_squared_error:
+- 0.48367053270339966
+- 0.5019604563713074
+- 0.485824316740036
+- 0.49431952834129333
+- 0.4695015847682953
diff --git a/training/results/MatProjectGapDataset/Megnet_make_crystal_model/Megnet_hyper.json b/training/results/MatProjectGapDataset/Megnet_make_crystal_model/Megnet_hyper.json
@@ -0,0 +1 @@
+{"model": {"module_name": "kgcnn.literature.Megnet", "class_name": "make_crystal_model", "config": {"name": "Megnet", "inputs": [{"shape": [null], "name": "node_number", "dtype": "int32", "ragged": true}, {"shape": [null, 3], "name": "node_coordinates", "dtype": "float32", "ragged": true}, {"shape": [null, 2], "name": "range_indices", "dtype": "int64", "ragged": true}, {"shape": [1], "name": "charge", "dtype": "float32", "ragged": false}, {"shape": [null, 3], "name": "range_image", "dtype": "int64", "ragged": true}, {"shape": [3, 3], "name": "graph_lattice", "dtype": "float32", "ragged": false}], "input_tensor_type": "ragged", "input_embedding": null, "input_node_embedding": {"input_dim": 95, "output_dim": 64}, "make_distance": true, "expand_distance": true, "gauss_args": {"bins": 25, "distance": 5, "offset": 0.0, "sigma": 0.4}, "meg_block_args": {"node_embed": [64, 32, 32], "edge_embed": [64, 32, 32], "env_embed": [64, 32, 32], "activation": "kgcnn>softplus2"}, "set2set_args": {"channels": 16, "T": 3, "pooling_method": "sum", "init_qstar": "0"}, "node_ff_args": {"units": [64, 32], "activation": "kgcnn>softplus2"}, "edge_ff_args": {"units": [64, 32], "activation": "kgcnn>softplus2"}, "state_ff_args": {"units": [64, 32], "activation": "kgcnn>softplus2"}, "nblocks": 3, "has_ff": true, "dropout": null, "use_set2set": true, "verbose": 10, "output_embedding": "graph", "output_mlp": {"use_bias": [true, true, true], "units": [32, 16, 1], "activation": ["kgcnn>softplus2", "kgcnn>softplus2", "linear"]}}}, "training": {"cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": true}}, "fit": {"batch_size": 32, "epochs": 1000, "validation_freq": 10, "verbose": 2, "callbacks": [{"class_name": "kgcnn>LinearLearningRateScheduler", "config": {"learning_rate_start": 0.0005, "learning_rate_stop": 5e-06, "epo_min": 100, "epo": 1000, "verbose": 0}}]}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": 0.0005}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}, "multi_target_indices": null}, "data": {"data_unit": "eV"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "2.2.3"}, "dataset": {"class_name": "MatProjectGapDataset", "module_name": "kgcnn.data.datasets.MatProjectGapDataset", "config": {}, "methods": [{"map_list": {"method": "set_range_periodic", "max_distance": 5.0}}]}}