# MODNet with PBE band gaps as a feature

Band gap predictions using MODNet, experimental data as a training set and the PBE band gap values as a feature.

In [1]:
def setup_threading():
    import os
    os.environ['OPENBLAS_NUM_THREADS'] = '1'
    os.environ['MKL_NUM_THREADS'] = '1'
    os.environ["OMP_NUM_THREADS"] = "1"
    os.environ["TF_NUM_INTRAOP_THREADS"] = "1"
    os.environ["TF_NUM_INTEROP_THREADS"] = "1"
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
setup_threading()

In [3]:
from modnet.preprocessing import MODData
from modnet.models.vanilla import MODNetModel
from modnet.hyper_opt.fit_genetic import FitGenetic
import numpy as np
import os
import copy

from collections import defaultdict
import itertools
import os
import pandas as pd
import matplotlib.pyplot as plt 
from IPython.display import Markdown
from matminer.datasets import load_dataset, get_all_dataset_info
from pymatgen.core import Composition

from modnet.featurizers import MODFeaturizer
from modnet.featurizers.presets import DeBreuck2020Featurizer

os.environ["CUDA_VISIBLE_DEVICES"] = "1"


If you use the ChemEnv tool for your research, please consider citing the following reference(s) :
David Waroquiers, Xavier Gonze, Gian-Marco Rignanese, Cathrin Welker-Nieuwoudt, Frank Rosowski,
Michael Goebel, Stephan Schenk, Peter Degelmann, Rute Andre, Robert Glaum, and Geoffroy Hautier,
"Statistical analysis of coordination environments in oxides",
Chem. Mater., 2017, 29 (19), pp 8346-8360,
DOI: 10.1021/acs.chemmater.7b02766



In [4]:
from sklearn.model_selection import KFold
from modnet.preprocessing import MODData

def shuffle_MD(data,random_state=10):
    data = copy.deepcopy(data)
    ids = data.df_targets.sample(frac=1,random_state=random_state).index
    data.df_featurized = data.df_featurized.loc[ids]
    data.df_targets = data.df_targets.loc[ids]
    data.df_structure = data.df_structure.loc[ids]
    
    return data

def MDKsplit(data,n_splits=5,random_state=10):
    data = shuffle_MD(data,random_state=random_state)
    ids = np.array(data.structure_ids)
    kf = KFold(n_splits=n_splits,shuffle=True,random_state=random_state)
    folds = []
    for train_idx, val_idx in kf.split(ids):
        data_train = MODData(data.df_structure.iloc[train_idx]['structure'].values,data.df_targets.iloc[train_idx].values,target_names=data.df_targets.columns,structure_ids=ids[train_idx])
        data_train.df_featurized = data.df_featurized.iloc[train_idx]
        #data_train.optimal_features = data.optimal_features
        
        data_val = MODData(data.df_structure.iloc[val_idx]['structure'].values,data.df_targets.iloc[val_idx].values,target_names=data.df_targets.columns,structure_ids=ids[val_idx])
        data_val.df_featurized = data.df_featurized.iloc[val_idx]
        #data_val.optimal_features = data.optimal_features

        folds.append((data_train,data_val))
        
    return folds

In [5]:
md_joint = MODData.load('exp_pbe_joint')

2021-06-14 17:44:16,282 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f0050762dc0> object, created with modnet version <=0.1.7


In [6]:
md_joint.df_structure

Unnamed: 0_level_0,structure
id,Unnamed: 1_level_1
mp-12699,"[[ 0.13616363 -2.60631371 1.50340165] Ca, [-1..."
mp-559459,"[[2.14049282 6.43464803 1.76199846] Cs, [-0.70..."
mp-21162,"[[0. 0. 0.] Sr, [0. 2.5259875 2.5259875..."
mp-1306,"[[3.259129 1.88166243 1.15971025] Th, [ 3.25..."
mp-15252,"[[0.95531657 1.042239 0.86229681] Cu, [2.803..."
...,...
mp-556541,"[[0. 6.49363948 5.82657217] In, [ 0. ..."
mp-2602,"[[3.023318 3.023318 3.023318] Nd, [0. 0. 0.] As]"
mp-30366,"[[4.51774558 1.37529211 4.32659015] Ca, [1.109..."
mp-3718,"[[2.0697935 0. 1.28932533] Th, [0. ..."


In [7]:
md_joint.df_featurized['Band gap PBE'] = md_joint.df_targets['pbe_gap']

In [8]:
md_joint.df_featurized

Unnamed: 0,ElementProperty|MagpieData minimum Number,ElementProperty|MagpieData maximum Number,ElementProperty|MagpieData range Number,ElementProperty|MagpieData mean Number,ElementProperty|MagpieData avg_dev Number,ElementProperty|MagpieData mode Number,ElementProperty|MagpieData minimum MendeleevNumber,ElementProperty|MagpieData maximum MendeleevNumber,ElementProperty|MagpieData range MendeleevNumber,ElementProperty|MagpieData mean MendeleevNumber,...,ChemEnvSiteFingerprint|GaussianSymmFuncstd_dev G2_80.0,ChemEnvSiteFingerprint|GaussianSymmFuncmean G4_0.005_1.0_1.0,ChemEnvSiteFingerprint|GaussianSymmFuncstd_dev G4_0.005_1.0_1.0,ChemEnvSiteFingerprint|GaussianSymmFuncmean G4_0.005_1.0_-1.0,ChemEnvSiteFingerprint|GaussianSymmFuncmean G4_0.005_4.0_1.0,ChemEnvSiteFingerprint|GaussianSymmFuncstd_dev G4_0.005_4.0_1.0,ChemEnvSiteFingerprint|GaussianSymmFuncmean G4_0.005_4.0_-1.0,ChemEnvSiteFingerprint|GaussianSymmFuncstd_dev G4_0.005_4.0_-1.0,ChemEnvSiteFingerprint|GaussianSymmFuncstd_dev G4_0.005_1.0_-1.0,Band gap PBE
mp-12699,20.0,79.0,59.0,53.714286,28.897959,79.0,7.0,66.0,59.0,40.714286,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
mp-559459,16.0,55.0,39.0,34.000000,18.000000,16.0,5.0,88.0,83.0,64.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.1704
mp-21162,38.0,82.0,44.0,71.000000,16.500000,82.0,8.0,81.0,73.0,62.750000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
mp-1306,13.0,90.0,77.0,32.250000,28.875000,13.0,16.0,73.0,57.0,58.750000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
mp-15252,16.0,32.0,16.0,23.000000,7.000000,16.0,64.0,88.0,24.0,78.500000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mp-556541,16.0,51.0,35.0,29.000000,16.000000,16.0,75.0,94.0,19.0,86.375000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.5135
mp-2602,33.0,60.0,27.0,46.500000,13.500000,33.0,19.0,84.0,65.0,51.500000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
mp-30366,20.0,79.0,59.0,34.750000,22.125000,20.0,7.0,66.0,59.0,21.750000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
mp-3718,8.0,90.0,82.0,50.000000,28.000000,8.0,16.0,90.0,74.0,64.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2418


In [9]:
md_joint.df_targets = md_joint.df_targets.drop(columns=['pbe_gap', 'difference'])

In [10]:
import uncertainty_toolbox as uct

k = 5
random_state = 202010
folds = MDKsplit(md_joint,n_splits=k,random_state=random_state)
maes = np.ones(5)
uncertainties = np.ones(5)
metricss = []
for i,f in enumerate(folds):
    train = f[0]
    test = f[1]
    #train.feature_selection(n=-1, use_precomputed_cross_nmi=True)
    fpath = 'train_PBE-feature_{}_{}'.format(random_state,i+1)
    if os.path.exists(fpath):
        train = MODData.load(fpath)
    else:
        train.feature_selection(n=-1, use_precomputed_cross_nmi=True)
        train.save(fpath)
      
    # assure no overlap
    assert len(set(train.df_targets.index).intersection(set(test.df_targets.index))) == 0
    
    ga = FitGenetic(train)
    model = ga.run(size_pop=20, num_generations=10, n_jobs=10)
    
    pred, std = model.predict(test, return_unc=True)
    true = test.df_targets
    metrics = uct.metrics.get_all_metrics(pred['exp_gap'].values, std['exp_gap'].values, true['exp_gap'].values)
    error = pred['exp_gap']-true['exp_gap']
    error = error.drop(pred.index[((pred['exp_gap']).abs()>20)]) # drop unrealistic values: happens extremely rarely
    std = std['exp_gap'].drop(std.index[((std['exp_gap']).abs()>20)]) # drop unrealistic values: happens extremely rarely
    mae = np.abs(error.values).mean()
    uncertainty = np.abs(std).mean()
    print('MAE =')
    print(mae)
    print('uncertainty =')
    print(uncertainty)
    maes[i] = mae
    uncertainties[i] = uncertainty
    metricss.append(metrics)

2021-06-14 17:44:16,490 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,505 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,516 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,531 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,541 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,557 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,567 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,583 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,593 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:16,610 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:17,092 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7effdae562b0> object, created with modnet version 0.1.9
2021-06-14 17

100%|██████████| 100/100 [11:14<00:00,  6.74s/it]

2021-06-14 17:55:33,235 - modnet - INFO - Loss per individual: ind 0: 0.987 	ind 1: 0.911 	ind 2: 0.911 	ind 3: 0.955 	ind 4: 0.987 	ind 5: 0.957 	ind 6: 0.922 	ind 7: 1.154 	ind 8: 0.905 	ind 9: 0.943 	ind 10: 0.919 	ind 11: 0.928 	ind 12: 0.976 	ind 13: 1.043 	ind 14: 0.965 	ind 15: 0.950 	ind 16: 0.994 	ind 17: 0.881 	ind 18: 0.893 	ind 19: 1.109 	





2021-06-14 17:55:34,760 - modnet - INFO - Generation number 1
2021-06-14 17:55:37,000 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:34<00:00,  6.94s/it]

2021-06-14 18:07:11,096 - modnet - INFO - Loss per individual: ind 0: 0.882 	ind 1: 0.933 	ind 2: 0.873 	ind 3: 0.974 	ind 4: 0.913 	ind 5: 0.935 	ind 6: 0.895 	ind 7: 0.897 	ind 8: 0.867 	ind 9: 0.933 	ind 10: 1.993 	ind 11: 0.987 	ind 12: 0.953 	ind 13: 0.912 	ind 14: 0.912 	ind 15: 0.936 	ind 16: 0.965 	ind 17: 1.027 	ind 18: 0.921 	ind 19: 0.913 	





2021-06-14 18:07:12,721 - modnet - INFO - Generation number 2
2021-06-14 18:07:15,418 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:53<00:00,  7.14s/it]

2021-06-14 18:19:08,983 - modnet - INFO - Loss per individual: ind 0: 0.976 	ind 1: 0.897 	ind 2: 0.921 	ind 3: 0.897 	ind 4: 1.009 	ind 5: 0.907 	ind 6: 0.951 	ind 7: 0.925 	ind 8: 1.002 	ind 9: 0.926 	ind 10: 0.962 	ind 11: 0.964 	ind 12: 0.922 	ind 13: 0.944 	ind 14: 0.881 	ind 15: 0.937 	ind 16: 0.929 	ind 17: 0.922 	ind 18: 0.954 	ind 19: 0.919 	





2021-06-14 18:19:10,554 - modnet - INFO - Generation number 3
2021-06-14 18:19:13,322 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:09<00:00,  7.29s/it]

2021-06-14 18:31:22,524 - modnet - INFO - Loss per individual: ind 0: 0.932 	ind 1: 0.927 	ind 2: 0.908 	ind 3: 0.906 	ind 4: 0.920 	ind 5: 0.947 	ind 6: 0.966 	ind 7: 0.884 	ind 8: 0.942 	ind 9: 0.928 	ind 10: 0.999 	ind 11: 0.900 	ind 12: 0.949 	ind 13: 0.911 	ind 14: 0.903 	ind 15: 0.949 	ind 16: 0.973 	ind 17: 0.932 	ind 18: 0.934 	ind 19: 0.915 	





2021-06-14 18:31:24,341 - modnet - INFO - Generation number 4
2021-06-14 18:31:27,474 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:51<00:00,  6.51s/it]

2021-06-14 18:42:18,740 - modnet - INFO - Loss per individual: ind 0: 0.930 	ind 1: 0.917 	ind 2: 0.904 	ind 3: 0.929 	ind 4: 0.941 	ind 5: 0.876 	ind 6: 0.956 	ind 7: 0.883 	ind 8: 0.972 	ind 9: 0.906 	ind 10: 0.959 	ind 11: 0.930 	ind 12: 0.940 	ind 13: 0.967 	ind 14: 0.944 	ind 15: 0.937 	ind 16: 0.891 	ind 17: 0.932 	ind 18: 0.987 	ind 19: 0.956 	





2021-06-14 18:42:20,524 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  6.50it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.96it/s]
 10%|█         | 1/10 [00:00<00:01,  6.04it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.62it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.364
  RMSE          0.762
  MDAE          0.084
  MARPD         132.720
  R2            0.775
  Correlation   0.884
  Root-mean-squared Calibration Error   0.095
  Mean-absolute Calibration Error       0.084
  Miscalibration Area                   0.085
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.142
     Group Size: 0.56 -- Calibration Error: 0.098
     Group Size: 1.00 -- Calibration Error: 0.084
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.155
     Group Size: 0.56 -- Calibration Error: 0.112
     Group Size: 1.00 -- Calibration Error: 0.095
  Sharpness   0.331
  Negative-log-likelihood   5.202
  CRPS                      0.292
  Check Score               0.147
  Interval Score            1.949
MAE =
0.36448238013036294
uncertainty =
0

100%|██████████| 100/100 [15:31<00:00,  9.31s/it]

2021-06-14 18:58:06,268 - modnet - INFO - Loss per individual: ind 0: 0.948 	ind 1: 0.957 	ind 2: 1.405 	ind 3: 1.113 	ind 4: 1.065 	ind 5: 0.932 	ind 6: 0.939 	ind 7: 0.973 	ind 8: 1.023 	ind 9: 0.934 	ind 10: 0.951 	ind 11: 0.928 	ind 12: 0.885 	ind 13: 0.938 	ind 14: 1.224 	ind 15: 0.922 	ind 16: 1.189 	ind 17: 1.220 	ind 18: 0.948 	ind 19: 0.882 	





2021-06-14 18:58:08,090 - modnet - INFO - Generation number 1
2021-06-14 18:58:11,434 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [17:31<00:00, 10.51s/it]

2021-06-14 19:15:42,894 - modnet - INFO - Loss per individual: ind 0: 0.931 	ind 1: 0.899 	ind 2: 0.927 	ind 3: 0.943 	ind 4: 0.903 	ind 5: 1.007 	ind 6: 0.929 	ind 7: 0.889 	ind 8: 1.228 	ind 9: 0.896 	ind 10: 0.992 	ind 11: 0.907 	ind 12: 0.976 	ind 13: 0.927 	ind 14: 0.953 	ind 15: 0.919 	ind 16: 0.894 	ind 17: 0.942 	ind 18: 0.894 	ind 19: 0.940 	





2021-06-14 19:15:44,922 - modnet - INFO - Generation number 2
2021-06-14 19:15:48,285 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [14:17<00:00,  8.58s/it]

2021-06-14 19:30:06,225 - modnet - INFO - Loss per individual: ind 0: 0.938 	ind 1: 0.912 	ind 2: 0.924 	ind 3: 0.948 	ind 4: 0.964 	ind 5: 0.893 	ind 6: 0.995 	ind 7: 0.973 	ind 8: 0.945 	ind 9: 0.868 	ind 10: 0.944 	ind 11: 0.937 	ind 12: 0.899 	ind 13: 0.956 	ind 14: 0.888 	ind 15: 0.921 	ind 16: 0.959 	ind 17: 1.321 	ind 18: 0.991 	ind 19: 0.890 	





2021-06-14 19:30:07,755 - modnet - INFO - Generation number 3
2021-06-14 19:30:10,624 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:56<00:00,  7.76s/it]

2021-06-14 19:43:06,940 - modnet - INFO - Loss per individual: ind 0: 1.037 	ind 1: 0.916 	ind 2: 1.058 	ind 3: 0.921 	ind 4: 0.934 	ind 5: 0.926 	ind 6: 0.881 	ind 7: 0.900 	ind 8: 1.030 	ind 9: 0.967 	ind 10: 0.934 	ind 11: 0.970 	ind 12: 0.921 	ind 13: 1.007 	ind 14: 0.961 	ind 15: 0.898 	ind 16: 0.954 	ind 17: 0.941 	ind 18: 0.914 	ind 19: 0.899 	





2021-06-14 19:43:08,630 - modnet - INFO - Generation number 4
2021-06-14 19:43:11,755 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [16:08<00:00,  9.68s/it]

2021-06-14 19:59:20,015 - modnet - INFO - Loss per individual: ind 0: 0.923 	ind 1: 1.302 	ind 2: 0.974 	ind 3: 0.893 	ind 4: 0.910 	ind 5: 0.975 	ind 6: 1.009 	ind 7: 0.936 	ind 8: 0.942 	ind 9: 0.926 	ind 10: 0.889 	ind 11: 1.083 	ind 12: 0.907 	ind 13: 0.914 	ind 14: 0.969 	ind 15: 0.891 	ind 16: 0.975 	ind 17: 0.937 	ind 18: 0.893 	ind 19: 1.056 	





2021-06-14 19:59:21,665 - modnet - INFO - Generation number 5
2021-06-14 19:59:24,836 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [13:14<00:00,  7.94s/it]

2021-06-14 20:12:38,993 - modnet - INFO - Loss per individual: ind 0: 0.901 	ind 1: 0.905 	ind 2: 0.914 	ind 3: 0.891 	ind 4: 1.073 	ind 5: 0.987 	ind 6: 0.927 	ind 7: 0.987 	ind 8: 0.966 	ind 9: 0.917 	ind 10: 0.897 	ind 11: 0.906 	ind 12: 0.914 	ind 13: 0.941 	ind 14: 0.913 	ind 15: 1.098 	ind 16: 1.100 	ind 17: 0.890 	ind 18: 0.942 	ind 19: 0.970 	





2021-06-14 20:12:40,616 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  6.04it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.58it/s]
 10%|█         | 1/10 [00:00<00:01,  6.19it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.72it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.364
  RMSE          0.724
  MDAE          0.059
  MARPD         132.936
  R2            0.792
  Correlation   0.891
  Root-mean-squared Calibration Error   0.087
  Mean-absolute Calibration Error       0.073
  Miscalibration Area                   0.073
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.134
     Group Size: 0.56 -- Calibration Error: 0.085
     Group Size: 1.00 -- Calibration Error: 0.073
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.152
     Group Size: 0.56 -- Calibration Error: 0.103
     Group Size: 1.00 -- Calibration Error: 0.087
  Sharpness   0.299
  Negative-log-likelihood   1.594
  CRPS                      0.294
  Check Score               0.148
  Interval Score            1.993
MAE =
0.36444801193655435
uncertainty =
0

100%|██████████| 100/100 [12:13<00:00,  7.34s/it]

2021-06-14 20:25:10,105 - modnet - INFO - Loss per individual: ind 0: 0.979 	ind 1: 0.943 	ind 2: 1.051 	ind 3: 0.971 	ind 4: 1.040 	ind 5: 0.957 	ind 6: 0.956 	ind 7: 1.053 	ind 8: 0.979 	ind 9: 1.500 	ind 10: 1.009 	ind 11: 0.980 	ind 12: 0.983 	ind 13: 1.043 	ind 14: 1.051 	ind 15: 1.034 	ind 16: 1.038 	ind 17: 1.006 	ind 18: 1.224 	ind 19: 0.945 	





2021-06-14 20:25:11,856 - modnet - INFO - Generation number 1
2021-06-14 20:25:15,148 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:08<00:00,  7.28s/it]

2021-06-14 20:37:23,574 - modnet - INFO - Loss per individual: ind 0: 1.029 	ind 1: 0.997 	ind 2: 1.154 	ind 3: 1.111 	ind 4: 1.026 	ind 5: 0.971 	ind 6: 1.018 	ind 7: 0.965 	ind 8: 0.938 	ind 9: 0.983 	ind 10: 1.006 	ind 11: 1.046 	ind 12: 0.984 	ind 13: 1.054 	ind 14: 1.176 	ind 15: 1.028 	ind 16: 0.976 	ind 17: 0.956 	ind 18: 0.990 	ind 19: 1.048 	





2021-06-14 20:37:25,327 - modnet - INFO - Generation number 2
2021-06-14 20:37:28,554 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [13:38<00:00,  8.18s/it] 

2021-06-14 20:51:07,033 - modnet - INFO - Loss per individual: ind 0: 0.935 	ind 1: 1.063 	ind 2: 1.106 	ind 3: 0.967 	ind 4: 1.093 	ind 5: 0.952 	ind 6: 0.970 	ind 7: 1.026 	ind 8: 0.997 	ind 9: 1.027 	ind 10: 0.986 	ind 11: 0.949 	ind 12: 0.969 	ind 13: 0.982 	ind 14: 0.964 	ind 15: 0.963 	ind 16: 0.958 	ind 17: 1.002 	ind 18: 0.968 	ind 19: 1.025 	





2021-06-14 20:51:08,760 - modnet - INFO - Generation number 3
2021-06-14 20:51:12,146 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:38<00:00,  7.59s/it]

2021-06-14 21:03:50,988 - modnet - INFO - Loss per individual: ind 0: 0.975 	ind 1: 0.939 	ind 2: 0.974 	ind 3: 0.943 	ind 4: 0.961 	ind 5: 1.046 	ind 6: 1.000 	ind 7: 0.989 	ind 8: 1.031 	ind 9: 1.052 	ind 10: 0.925 	ind 11: 1.098 	ind 12: 1.003 	ind 13: 1.067 	ind 14: 0.969 	ind 15: 0.916 	ind 16: 0.959 	ind 17: 1.010 	ind 18: 0.920 	ind 19: 0.953 	





2021-06-14 21:03:52,501 - modnet - INFO - Generation number 4
2021-06-14 21:03:55,516 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:49<00:00,  7.69s/it]

2021-06-14 21:16:44,613 - modnet - INFO - Loss per individual: ind 0: 0.966 	ind 1: 0.951 	ind 2: 1.000 	ind 3: 0.977 	ind 4: 0.954 	ind 5: 0.943 	ind 6: 0.971 	ind 7: 0.940 	ind 8: 0.938 	ind 9: 0.942 	ind 10: 0.983 	ind 11: 0.980 	ind 12: 0.945 	ind 13: 0.979 	ind 14: 0.982 	ind 15: 1.041 	ind 16: 0.963 	ind 17: 0.957 	ind 18: 0.941 	ind 19: 0.982 	





2021-06-14 21:16:46,152 - modnet - INFO - Generation number 5
2021-06-14 21:16:49,169 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [15:10<00:00,  9.11s/it]

2021-06-14 21:31:59,893 - modnet - INFO - Loss per individual: ind 0: 1.047 	ind 1: 1.015 	ind 2: 0.949 	ind 3: 0.986 	ind 4: 1.121 	ind 5: 1.011 	ind 6: 1.071 	ind 7: 0.982 	ind 8: 0.993 	ind 9: 1.006 	ind 10: 0.984 	ind 11: 0.924 	ind 12: 0.953 	ind 13: 0.949 	ind 14: 0.944 	ind 15: 0.944 	ind 16: 0.975 	ind 17: 0.984 	ind 18: 0.970 	ind 19: 0.968 	





2021-06-14 21:32:01,495 - modnet - INFO - Generation number 6
2021-06-14 21:32:04,877 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [22:55<00:00, 13.75s/it]

2021-06-14 21:55:00,194 - modnet - INFO - Loss per individual: ind 0: 0.972 	ind 1: 0.948 	ind 2: 1.057 	ind 3: 0.939 	ind 4: 0.946 	ind 5: 0.932 	ind 6: 0.975 	ind 7: 1.019 	ind 8: 0.949 	ind 9: 0.962 	ind 10: 0.930 	ind 11: 0.980 	ind 12: 1.035 	ind 13: 0.936 	ind 14: 1.070 	ind 15: 0.964 	ind 16: 0.984 	ind 17: 0.927 	ind 18: 0.975 	ind 19: 0.964 	





2021-06-14 21:55:01,823 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  5.92it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.98it/s]
 10%|█         | 1/10 [00:00<00:01,  5.54it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  6.01it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           454.326
  RMSE          4929.412
  MDAE          0.016
  MARPD         138.098
  R2            -11177060.117
  Correlation   -0.020
  Root-mean-squared Calibration Error   0.089
  Mean-absolute Calibration Error       0.076
  Miscalibration Area                   0.077
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.118
     Group Size: 0.56 -- Calibration Error: 0.088
     Group Size: 1.00 -- Calibration Error: 0.076
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.153
     Group Size: 0.56 -- Calibration Error: 0.109
     Group Size: 1.00 -- Calibration Error: 0.089
  Sharpness   6070.165
  Negative-log-likelihood   3.633
  CRPS                      270.405
  Check Score               136.537
  Interval Score            1245.799
MAE =
0.303133871

100%|██████████| 100/100 [36:13<00:00, 21.73s/it]  

2021-06-14 22:31:31,822 - modnet - INFO - Loss per individual: ind 0: 1.078 	ind 1: 1.039 	ind 2: 0.925 	ind 3: 1.024 	ind 4: 1.320 	ind 5: 0.994 	ind 6: 0.916 	ind 7: 1.021 	ind 8: 1.039 	ind 9: 0.898 	ind 10: 1.225 	ind 11: 1.038 	ind 12: 0.997 	ind 13: 0.965 	ind 14: 0.967 	ind 15: 0.942 	ind 16: 0.896 	ind 17: 0.997 	ind 18: 0.941 	ind 19: 1.038 	





2021-06-14 22:31:33,560 - modnet - INFO - Generation number 1
2021-06-14 22:31:35,736 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [34:04<00:00, 20.44s/it]  

2021-06-14 23:05:39,950 - modnet - INFO - Loss per individual: ind 0: 0.983 	ind 1: 0.922 	ind 2: 0.986 	ind 3: 0.902 	ind 4: 0.992 	ind 5: 0.988 	ind 6: 0.958 	ind 7: 0.932 	ind 8: 0.917 	ind 9: 1.088 	ind 10: 0.974 	ind 11: 0.989 	ind 12: 1.025 	ind 13: 0.926 	ind 14: 0.969 	ind 15: 0.914 	ind 16: 1.020 	ind 17: 0.942 	ind 18: 0.941 	ind 19: 1.060 	





2021-06-14 23:05:41,543 - modnet - INFO - Generation number 2
2021-06-14 23:05:45,184 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [13:23<00:00,  8.04s/it]

2021-06-14 23:19:08,819 - modnet - INFO - Loss per individual: ind 0: 0.962 	ind 1: 0.927 	ind 2: 0.917 	ind 3: 0.938 	ind 4: 0.962 	ind 5: 0.996 	ind 6: 1.078 	ind 7: 0.918 	ind 8: 0.991 	ind 9: 1.247 	ind 10: 1.009 	ind 11: 1.064 	ind 12: 0.922 	ind 13: 1.009 	ind 14: 0.947 	ind 15: 0.968 	ind 16: 0.934 	ind 17: 1.004 	ind 18: 0.976 	ind 19: 0.923 	





2021-06-14 23:19:10,368 - modnet - INFO - Generation number 3
2021-06-14 23:19:14,139 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [13:27<00:00,  8.08s/it]

2021-06-14 23:32:41,887 - modnet - INFO - Loss per individual: ind 0: 0.934 	ind 1: 0.931 	ind 2: 1.042 	ind 3: 1.047 	ind 4: 0.946 	ind 5: 0.950 	ind 6: 0.952 	ind 7: 0.929 	ind 8: 1.068 	ind 9: 0.945 	ind 10: 0.995 	ind 11: 0.903 	ind 12: 1.032 	ind 13: 0.929 	ind 14: 1.034 	ind 15: 0.925 	ind 16: 0.937 	ind 17: 0.930 	ind 18: 1.015 	ind 19: 0.923 	





2021-06-14 23:32:43,129 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  8.04it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  9.47it/s]
 20%|██        | 2/10 [00:00<00:00, 10.16it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  9.95it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.341
  RMSE          0.758
  MDAE          0.033
  MARPD         137.805
  R2            0.800
  Correlation   0.898
  Root-mean-squared Calibration Error   0.071
  Mean-absolute Calibration Error       0.059
  Miscalibration Area                   0.059
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.133
     Group Size: 0.56 -- Calibration Error: 0.079
     Group Size: 1.00 -- Calibration Error: 0.059
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.140
     Group Size: 0.56 -- Calibration Error: 0.091
     Group Size: 1.00 -- Calibration Error: 0.071
  Sharpness   0.290
  Negative-log-likelihood   32.304
  CRPS                      0.274
  Check Score               0.138
  Interval Score            1.858
MAE =
0.34086998162569354
uncertainty =


100%|██████████| 100/100 [13:50<00:00,  8.31s/it]

2021-06-14 23:46:42,737 - modnet - INFO - Loss per individual: ind 0: 0.889 	ind 1: 0.969 	ind 2: 0.948 	ind 3: 0.883 	ind 4: 0.905 	ind 5: 0.888 	ind 6: 0.992 	ind 7: 0.986 	ind 8: 0.895 	ind 9: 0.906 	ind 10: 0.959 	ind 11: 5.278 	ind 12: 0.861 	ind 13: 2.108 	ind 14: 0.967 	ind 15: 0.949 	ind 16: 0.928 	ind 17: 0.949 	ind 18: 0.942 	ind 19: 0.968 	





2021-06-14 23:46:44,530 - modnet - INFO - Generation number 1
2021-06-14 23:46:48,459 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:39<00:00,  7.60s/it]

2021-06-14 23:59:28,193 - modnet - INFO - Loss per individual: ind 0: 0.877 	ind 1: 1.076 	ind 2: 0.972 	ind 3: 0.873 	ind 4: 0.913 	ind 5: 0.876 	ind 6: 0.927 	ind 7: 0.949 	ind 8: 0.946 	ind 9: 0.982 	ind 10: 1.081 	ind 11: 0.915 	ind 12: 0.901 	ind 13: 0.950 	ind 14: 0.957 	ind 15: 0.928 	ind 16: 0.887 	ind 17: 0.870 	ind 18: 0.943 	ind 19: 0.911 	





2021-06-14 23:59:30,023 - modnet - INFO - Generation number 2
2021-06-14 23:59:34,172 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:40<00:00,  7.61s/it]

2021-06-15 00:12:15,137 - modnet - INFO - Loss per individual: ind 0: 0.898 	ind 1: 0.974 	ind 2: 0.896 	ind 3: 0.901 	ind 4: 0.923 	ind 5: 0.940 	ind 6: 0.921 	ind 7: 0.867 	ind 8: 0.983 	ind 9: 0.943 	ind 10: 0.900 	ind 11: 1.010 	ind 12: 0.878 	ind 13: 0.940 	ind 14: 0.971 	ind 15: 1.012 	ind 16: 0.917 	ind 17: 0.983 	ind 18: 0.874 	ind 19: 1.044 	





2021-06-15 00:12:16,945 - modnet - INFO - Generation number 3
2021-06-15 00:12:21,643 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:01<00:00,  7.21s/it]

2021-06-15 00:24:23,123 - modnet - INFO - Loss per individual: ind 0: 0.972 	ind 1: 1.035 	ind 2: 0.936 	ind 3: 0.929 	ind 4: 0.940 	ind 5: 0.912 	ind 6: 0.952 	ind 7: 0.933 	ind 8: 0.986 	ind 9: 0.909 	ind 10: 0.891 	ind 11: 0.945 	ind 12: 0.944 	ind 13: 0.861 	ind 14: 0.875 	ind 15: 0.983 	ind 16: 0.879 	ind 17: 0.927 	ind 18: 0.900 	ind 19: 0.887 	





2021-06-15 00:24:24,828 - modnet - INFO - Generation number 4
2021-06-15 00:24:29,808 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:13<00:00,  7.34s/it]

2021-06-15 00:36:43,778 - modnet - INFO - Loss per individual: ind 0: 0.953 	ind 1: 1.040 	ind 2: 0.919 	ind 3: 0.938 	ind 4: 0.938 	ind 5: 0.889 	ind 6: 0.956 	ind 7: 0.921 	ind 8: 0.872 	ind 9: 0.882 	ind 10: 0.945 	ind 11: 0.952 	ind 12: 0.910 	ind 13: 0.928 	ind 14: 0.997 	ind 15: 1.074 	ind 16: 0.877 	ind 17: 0.865 	ind 18: 0.936 	ind 19: 0.933 	





2021-06-15 00:36:45,635 - modnet - INFO - Generation number 5
2021-06-15 00:36:50,999 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:26<00:00,  7.47s/it]

2021-06-15 00:49:17,775 - modnet - INFO - Loss per individual: ind 0: 1.008 	ind 1: 0.891 	ind 2: 0.944 	ind 3: 0.882 	ind 4: 0.916 	ind 5: 0.903 	ind 6: 1.022 	ind 7: 0.866 	ind 8: 0.920 	ind 9: 0.930 	ind 10: 0.885 	ind 11: 0.939 	ind 12: 0.889 	ind 13: 0.915 	ind 14: 0.900 	ind 15: 0.889 	ind 16: 1.037 	ind 17: 0.921 	ind 18: 0.925 	ind 19: 1.010 	





2021-06-15 00:49:19,462 - modnet - INFO - Generation number 6
2021-06-15 00:49:24,318 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [14:15<00:00,  8.55s/it]

2021-06-15 01:03:39,771 - modnet - INFO - Loss per individual: ind 0: 0.883 	ind 1: 0.922 	ind 2: 0.976 	ind 3: 0.962 	ind 4: 0.918 	ind 5: 0.932 	ind 6: 0.921 	ind 7: 0.969 	ind 8: 0.967 	ind 9: 0.932 	ind 10: 0.931 	ind 11: 0.910 	ind 12: 0.874 	ind 13: 0.904 	ind 14: 0.923 	ind 15: 0.906 	ind 16: 0.932 	ind 17: 0.885 	ind 18: 0.946 	ind 19: 0.923 	





2021-06-15 01:03:41,144 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:00,  9.04it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  8.39it/s]
 10%|█         | 1/10 [00:00<00:01,  7.41it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  7.94it/s]

 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.346
  RMSE          0.699
  MDAE          0.024
  MARPD         136.681
  R2            0.839
  Correlation   0.920
  Root-mean-squared Calibration Error   0.201
  Mean-absolute Calibration Error       0.174
  Miscalibration Area                   0.176
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.227
     Group Size: 0.56 -- Calibration Error: 0.187
     Group Size: 1.00 -- Calibration Error: 0.174
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.255
     Group Size: 0.56 -- Calibration Error: 0.220
     Group Size: 1.00 -- Calibration Error: 0.201
  Sharpness   0.347
  Negative-log-likelihood   27.419
  CRPS                      0.269
  Check Score               0.136
  Interval Score            1.717
MAE =
0.3464376142968117
uncertainty =
0




In [11]:
maes

array([0.36448238, 0.36444801, 0.30313387, 0.34086998, 0.34643761])

In [12]:
maes.mean()

0.3438743718191598

In [13]:
uncertainties

array([0.21845903, 0.19188686, 0.18078476, 0.17949469, 0.1999878 ])

In [14]:
uncertainties.mean()

0.19412262737751007

In [15]:
metricss

[{'accuracy': {'mae': 0.36448238013036294,
   'rmse': 0.761863198955022,
   'mdae': 0.08438744395971298,
   'marpd': 132.72021905337536,
   'r2': 0.7751361720408235,
   'corr': 0.8842640762160107},
  'avg_calibration': {'rms_cal': 0.09542256098834742,
   'ma_cal': 0.0844770283479961,
   'miscal_area': 0.08531969822292407},
  'adv_group_calibration': {'ma_adv_group_cal': {'group_sizes': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
           0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ]),
    'adv_group_cali_mean': array([0.45786869, 0.14153854, 0.11149934, 0.10703349, 0.10323864,
           0.0982405 , 0.09345851, 0.09279935, 0.08887299, 0.08447703]),
    'adv_group_cali_stderr': array([0.03525213, 0.01862428, 0.01110936, 0.00725247, 0.00419611,
           0.00441212, 0.00532296, 0.00237604, 0.00162063, 0.        ])},
   'rms_adv_group_cal': {'group_sizes': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
           0.55555556, 0.66

#### Conclusion

Relative percentage change of the error compared to MODNet_exp_GA reference: 1%