# MODNet on experimental dataset 

MODNet model on the experimental dataset alone. This model is used as our benchmark in this Ensemble Methods repository.

In [1]:
def setup_threading():
    import os
    os.environ['OPENBLAS_NUM_THREADS'] = '1'
    os.environ['MKL_NUM_THREADS'] = '1'
    os.environ["OMP_NUM_THREADS"] = "1"
    os.environ["TF_NUM_INTRAOP_THREADS"] = "1"
    os.environ["TF_NUM_INTEROP_THREADS"] = "1"
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
setup_threading()

In [3]:
from modnet.preprocessing import MODData
from modnet.models.vanilla import MODNetModel
from modnet.hyper_opt.fit_genetic import FitGenetic
import numpy as np
import os
import copy

from collections import defaultdict
import itertools
import os
import pandas as pd
import matplotlib.pyplot as plt 
from IPython.display import Markdown
from matminer.datasets import load_dataset, get_all_dataset_info
from pymatgen.core import Composition

from modnet.featurizers import MODFeaturizer
from modnet.featurizers.presets import DeBreuck2020Featurizer

os.environ["CUDA_VISIBLE_DEVICES"] = "1"


If you use the ChemEnv tool for your research, please consider citing the following reference(s) :
David Waroquiers, Xavier Gonze, Gian-Marco Rignanese, Cathrin Welker-Nieuwoudt, Frank Rosowski,
Michael Goebel, Stephan Schenk, Peter Degelmann, Rute Andre, Robert Glaum, and Geoffroy Hautier,
"Statistical analysis of coordination environments in oxides",
Chem. Mater., 2017, 29 (19), pp 8346-8360,
DOI: 10.1021/acs.chemmater.7b02766



In [4]:
from sklearn.model_selection import KFold
from modnet.preprocessing import MODData

def shuffle_MD(data,random_state=10):
    data = copy.deepcopy(data)
    ids = data.df_targets.sample(frac=1,random_state=random_state).index
    data.df_featurized = data.df_featurized.loc[ids]
    data.df_targets = data.df_targets.loc[ids]
    data.df_structure = data.df_structure.loc[ids]
    
    return data

def MDKsplit(data,n_splits=5,random_state=10):
    data = shuffle_MD(data,random_state=random_state)
    ids = np.array(data.structure_ids)
    kf = KFold(n_splits=n_splits,shuffle=True,random_state=random_state)
    folds = []
    for train_idx, val_idx in kf.split(ids):
        data_train = MODData(data.df_structure.iloc[train_idx]['structure'].values,data.df_targets.iloc[train_idx].values,target_names=data.df_targets.columns,structure_ids=ids[train_idx])
        data_train.df_featurized = data.df_featurized.iloc[train_idx]
        #data_train.optimal_features = data.optimal_features
        
        data_val = MODData(data.df_structure.iloc[val_idx]['structure'].values,data.df_targets.iloc[val_idx].values,target_names=data.df_targets.columns,structure_ids=ids[val_idx])
        data_val.df_featurized = data.df_featurized.iloc[val_idx]
        #data_val.optimal_features = data.optimal_features

        folds.append((data_train,data_val))
        
    return folds

In [5]:
md = MODData.load('exp_gap_all')
df = pd.read_pickle('df_exp_cleaned.pkl')
md_joint = MODData.load('exp_pbe_joint')

2021-06-14 17:43:55,625 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f63c4940bb0> object, created with modnet version <=0.1.7
2021-06-14 17:43:56,239 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f6355795700> object, created with modnet version <=0.1.7


In [6]:
md_joint.df_targets

Unnamed: 0,exp_gap,pbe_gap,difference
mp-12699,0.00,0.0000,0.0000
mp-559459,3.40,2.1704,1.2296
mp-21162,0.00,0.0000,0.0000
mp-1306,0.00,0.0000,0.0000
mp-15252,0.53,0.1049,0.4251
...,...,...,...
mp-556541,1.80,1.5135,0.2865
mp-2602,1.04,0.0000,1.0400
mp-30366,0.00,0.0000,0.0000
mp-3718,1.45,0.2418,1.2082


In [7]:
md_joint.df_targets = md_joint.df_targets.drop(columns=['pbe_gap', 'difference'])

In [8]:
md_joint.df_targets

Unnamed: 0,exp_gap
mp-12699,0.00
mp-559459,3.40
mp-21162,0.00
mp-1306,0.00
mp-15252,0.53
...,...
mp-556541,1.80
mp-2602,1.04
mp-30366,0.00
mp-3718,1.45


In [9]:
md_joint.df_structure

Unnamed: 0_level_0,structure
id,Unnamed: 1_level_1
mp-12699,"[[ 0.13616363 -2.60631371 1.50340165] Ca, [-1..."
mp-559459,"[[2.14049282 6.43464803 1.76199846] Cs, [-0.70..."
mp-21162,"[[0. 0. 0.] Sr, [0. 2.5259875 2.5259875..."
mp-1306,"[[3.259129 1.88166243 1.15971025] Th, [ 3.25..."
mp-15252,"[[0.95531657 1.042239 0.86229681] Cu, [2.803..."
...,...
mp-556541,"[[0. 6.49363948 5.82657217] In, [ 0. ..."
mp-2602,"[[3.023318 3.023318 3.023318] Nd, [0. 0. 0.] As]"
mp-30366,"[[4.51774558 1.37529211 4.32659015] Ca, [1.109..."
mp-3718,"[[2.0697935 0. 1.28932533] Th, [0. ..."


In [10]:
import uncertainty_toolbox as uct

k = 5
random_state = 202010
folds = MDKsplit(md_joint,n_splits=k,random_state=random_state)
maes = np.ones(5)
uncertainties = np.ones(5)
metricss = []
for i,f in enumerate(folds):
    train = f[0]
    test = f[1]
    #train.feature_selection(n=-1, use_precomputed_cross_nmi=True)
    fpath = 'train_{}_{}'.format(random_state,i+1)
    if os.path.exists(fpath):
        train = MODData.load(fpath)
    else:
        train.feature_selection(n=-1, use_precomputed_cross_nmi=True)
        train.save(fpath)
      
    # assure no overlap
    assert len(set(train.df_targets.index).intersection(set(test.df_targets.index))) == 0
    
    ga = FitGenetic(train)
    model = ga.run(size_pop=20, num_generations=10, n_jobs=10)
    
    pred, std = model.predict(test, return_unc=True)
    true = test.df_targets
    metrics = uct.metrics.get_all_metrics(pred['exp_gap'].values, std['exp_gap'].values, true['exp_gap'].values)
    error = pred['exp_gap']-true['exp_gap']
    error = error.drop(pred.index[((pred['exp_gap']).abs()>20)]) # drop unrealistic values: happens extremely rarely
    std = std['exp_gap'].drop(std.index[((std['exp_gap']).abs()>20)]) # drop unrealistic values: happens extremely rarely
    mae = np.abs(error.values).mean()
    uncertainty = np.abs(std).mean()
    print('MAE =')
    print(mae)
    print('uncertainty =')
    print(uncertainty)
    maes[i] = mae
    uncertainties[i] = uncertainty
    metricss.append(metrics)

2021-06-14 17:43:56,483 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,503 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,515 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,534 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,545 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,560 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,569 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,584 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,593 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:56,607 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:43:57,411 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f6342db1ca0> object, created with modnet version 0.1.9
2021-06-14 17

100%|██████████| 100/100 [12:09<00:00,  7.30s/it]

2021-06-14 17:56:08,841 - modnet - INFO - Loss per individual: ind 0: 0.598 	ind 1: 0.654 	ind 2: 0.726 	ind 3: 0.610 	ind 4: 0.947 	ind 5: 0.768 	ind 6: 0.739 	ind 7: 0.736 	ind 8: 0.626 	ind 9: 0.651 	ind 10: 0.684 	ind 11: 0.741 	ind 12: 0.603 	ind 13: 0.589 	ind 14: 0.663 	ind 15: 0.635 	ind 16: 0.633 	ind 17: 0.682 	ind 18: 0.640 	ind 19: 0.735 	





2021-06-14 17:56:10,588 - modnet - INFO - Generation number 1
2021-06-14 17:56:13,295 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:38<00:00,  7.59s/it]

2021-06-14 18:08:51,882 - modnet - INFO - Loss per individual: ind 0: 0.658 	ind 1: 0.726 	ind 2: 0.659 	ind 3: 0.599 	ind 4: 0.566 	ind 5: 0.664 	ind 6: 0.625 	ind 7: 0.655 	ind 8: 0.589 	ind 9: 1.023 	ind 10: 0.622 	ind 11: 0.633 	ind 12: 0.756 	ind 13: 0.578 	ind 14: 0.570 	ind 15: 0.616 	ind 16: 0.613 	ind 17: 0.762 	ind 18: 0.617 	ind 19: 0.594 	





2021-06-14 18:08:53,565 - modnet - INFO - Generation number 2
2021-06-14 18:08:56,410 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:29<00:00,  6.89s/it]

2021-06-14 18:20:25,486 - modnet - INFO - Loss per individual: ind 0: 0.653 	ind 1: 0.594 	ind 2: 0.603 	ind 3: 0.631 	ind 4: 0.622 	ind 5: 0.559 	ind 6: 0.742 	ind 7: 0.615 	ind 8: 0.608 	ind 9: 0.607 	ind 10: 0.594 	ind 11: 0.585 	ind 12: 0.580 	ind 13: 0.737 	ind 14: 0.607 	ind 15: 0.604 	ind 16: 0.625 	ind 17: 0.637 	ind 18: 0.711 	ind 19: 0.707 	





2021-06-14 18:20:27,413 - modnet - INFO - Generation number 3
2021-06-14 18:20:30,645 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:15<00:00,  6.15s/it]

2021-06-14 18:30:45,840 - modnet - INFO - Loss per individual: ind 0: 0.967 	ind 1: 0.599 	ind 2: 0.595 	ind 3: 0.600 	ind 4: 0.570 	ind 5: 0.644 	ind 6: 0.653 	ind 7: 0.742 	ind 8: 0.655 	ind 9: 0.628 	ind 10: 0.646 	ind 11: 0.599 	ind 12: 1.010 	ind 13: 0.645 	ind 14: 0.590 	ind 15: 0.720 	ind 16: 0.592 	ind 17: 0.603 	ind 18: 0.599 	ind 19: 0.587 	





2021-06-14 18:30:47,597 - modnet - INFO - Generation number 4
2021-06-14 18:30:50,706 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:33<00:00,  6.94s/it]

2021-06-14 18:42:24,298 - modnet - INFO - Loss per individual: ind 0: 0.630 	ind 1: 0.585 	ind 2: 0.653 	ind 3: 0.595 	ind 4: 0.646 	ind 5: 0.670 	ind 6: 0.587 	ind 7: 0.608 	ind 8: 0.633 	ind 9: 0.569 	ind 10: 0.758 	ind 11: 0.743 	ind 12: 0.672 	ind 13: 0.662 	ind 14: 0.751 	ind 15: 0.674 	ind 16: 0.600 	ind 17: 0.611 	ind 18: 0.585 	ind 19: 0.564 	





2021-06-14 18:42:26,047 - modnet - INFO - Generation number 5
2021-06-14 18:42:29,181 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:14<00:00,  6.74s/it]

2021-06-14 18:53:43,297 - modnet - INFO - Loss per individual: ind 0: 0.668 	ind 1: 0.594 	ind 2: 0.614 	ind 3: 0.600 	ind 4: 0.583 	ind 5: 0.582 	ind 6: 0.569 	ind 7: 0.584 	ind 8: 0.615 	ind 9: 0.649 	ind 10: 0.660 	ind 11: 0.631 	ind 12: 0.569 	ind 13: 0.578 	ind 14: 0.745 	ind 15: 0.671 	ind 16: 0.752 	ind 17: 0.606 	ind 18: 0.618 	ind 19: 0.627 	





2021-06-14 18:53:44,965 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  5.78it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.40it/s]
 10%|█         | 1/10 [00:00<00:01,  5.72it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.37it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.396
  RMSE          0.859
  MDAE          0.072
  MARPD         134.327
  R2            0.714
  Correlation   0.864
  Root-mean-squared Calibration Error   0.071
  Mean-absolute Calibration Error       0.056
  Miscalibration Area                   0.056
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.121
     Group Size: 0.56 -- Calibration Error: 0.071
     Group Size: 1.00 -- Calibration Error: 0.056
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.147
     Group Size: 0.56 -- Calibration Error: 0.086
     Group Size: 1.00 -- Calibration Error: 0.071
  Sharpness   0.349
  Negative-log-likelihood   2.198
  CRPS                      0.316
  Check Score               0.159
  Interval Score            2.126
MAE =
0.39583979969546573
uncertainty =
0

100%|██████████| 100/100 [13:44<00:00,  8.25s/it]

2021-06-14 19:07:50,533 - modnet - INFO - Loss per individual: ind 0: 0.686 	ind 1: 0.653 	ind 2: 0.679 	ind 3: 0.727 	ind 4: 0.652 	ind 5: 0.596 	ind 6: 0.660 	ind 7: 0.643 	ind 8: 0.659 	ind 9: 0.604 	ind 10: 0.617 	ind 11: 0.796 	ind 12: 0.677 	ind 13: 0.588 	ind 14: 0.606 	ind 15: 0.735 	ind 16: 0.625 	ind 17: 0.676 	ind 18: 0.662 	ind 19: 0.641 	





2021-06-14 19:07:52,276 - modnet - INFO - Generation number 1
2021-06-14 19:07:55,522 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [13:05<00:00,  7.85s/it]

2021-06-14 19:21:00,757 - modnet - INFO - Loss per individual: ind 0: 0.663 	ind 1: 0.656 	ind 2: 0.700 	ind 3: 0.610 	ind 4: 0.634 	ind 5: 0.612 	ind 6: 0.640 	ind 7: 0.734 	ind 8: 0.617 	ind 9: 0.618 	ind 10: 0.615 	ind 11: 0.679 	ind 12: 0.741 	ind 13: 0.619 	ind 14: 0.680 	ind 15: 0.671 	ind 16: 0.611 	ind 17: 0.606 	ind 18: 0.594 	ind 19: 0.639 	





2021-06-14 19:21:02,546 - modnet - INFO - Generation number 2
2021-06-14 19:21:05,749 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:33<00:00,  7.54s/it]

2021-06-14 19:33:39,584 - modnet - INFO - Loss per individual: ind 0: 0.626 	ind 1: 0.603 	ind 2: 0.624 	ind 3: 0.623 	ind 4: 0.646 	ind 5: 0.620 	ind 6: 0.640 	ind 7: 0.626 	ind 8: 0.624 	ind 9: 0.602 	ind 10: 0.787 	ind 11: 0.596 	ind 12: 0.597 	ind 13: 0.646 	ind 14: 0.607 	ind 15: 0.592 	ind 16: 0.686 	ind 17: 0.675 	ind 18: 0.674 	ind 19: 0.741 	





2021-06-14 19:33:41,102 - modnet - INFO - Generation number 3
2021-06-14 19:33:44,091 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:21<00:00,  6.82s/it]

2021-06-14 19:45:05,726 - modnet - INFO - Loss per individual: ind 0: 0.597 	ind 1: 0.676 	ind 2: 0.627 	ind 3: 0.635 	ind 4: 0.601 	ind 5: 0.648 	ind 6: 0.608 	ind 7: 0.624 	ind 8: 0.654 	ind 9: 0.617 	ind 10: 0.605 	ind 11: 0.621 	ind 12: 0.644 	ind 13: 0.635 	ind 14: 0.751 	ind 15: 0.628 	ind 16: 0.615 	ind 17: 0.668 	ind 18: 0.651 	ind 19: 0.593 	





2021-06-14 19:45:07,564 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  6.24it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.23it/s]
 10%|█         | 1/10 [00:00<00:01,  5.97it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.18it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.355
  RMSE          0.721
  MDAE          0.052
  MARPD         132.497
  R2            0.794
  Correlation   0.891
  Root-mean-squared Calibration Error   0.132
  Mean-absolute Calibration Error       0.123
  Miscalibration Area                   0.124
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.198
     Group Size: 0.56 -- Calibration Error: 0.148
     Group Size: 1.00 -- Calibration Error: 0.123
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.206
     Group Size: 0.56 -- Calibration Error: 0.152
     Group Size: 1.00 -- Calibration Error: 0.132
  Sharpness   0.312
  Negative-log-likelihood   2.535
  CRPS                      0.282
  Check Score               0.142
  Interval Score            1.869
MAE =
0.3545426163906421
uncertainty =
0.

100%|██████████| 100/100 [13:17<00:00,  7.97s/it]

2021-06-14 19:58:40,262 - modnet - INFO - Loss per individual: ind 0: 0.731 	ind 1: 0.742 	ind 2: 0.642 	ind 3: 0.782 	ind 4: 0.780 	ind 5: 0.757 	ind 6: 0.649 	ind 7: 0.645 	ind 8: 0.616 	ind 9: 0.629 	ind 10: 0.679 	ind 11: 0.772 	ind 12: 0.646 	ind 13: 0.689 	ind 14: 0.685 	ind 15: 0.639 	ind 16: 0.624 	ind 17: 0.708 	ind 18: 0.691 	ind 19: 0.643 	





2021-06-14 19:58:41,830 - modnet - INFO - Generation number 1
2021-06-14 19:58:44,724 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:41<00:00,  7.02s/it]

2021-06-14 20:10:26,782 - modnet - INFO - Loss per individual: ind 0: 0.709 	ind 1: 0.731 	ind 2: 0.641 	ind 3: 0.638 	ind 4: 0.674 	ind 5: 0.801 	ind 6: 0.782 	ind 7: 0.650 	ind 8: 0.636 	ind 9: 0.671 	ind 10: 0.658 	ind 11: 0.708 	ind 12: 0.693 	ind 13: 0.669 	ind 14: 0.614 	ind 15: 0.627 	ind 16: 0.659 	ind 17: 0.635 	ind 18: 0.669 	ind 19: 0.633 	





2021-06-14 20:10:28,564 - modnet - INFO - Generation number 2
2021-06-14 20:10:31,824 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [09:29<00:00,  5.70s/it]

2021-06-14 20:20:01,619 - modnet - INFO - Loss per individual: ind 0: 0.638 	ind 1: 0.689 	ind 2: 0.659 	ind 3: 0.674 	ind 4: 0.742 	ind 5: 0.668 	ind 6: 0.628 	ind 7: 0.812 	ind 8: 0.614 	ind 9: 0.654 	ind 10: 0.692 	ind 11: 0.675 	ind 12: 0.642 	ind 13: 0.694 	ind 14: 0.624 	ind 15: 0.639 	ind 16: 0.703 	ind 17: 0.678 	ind 18: 0.674 	ind 19: 0.661 	





2021-06-14 20:20:03,176 - modnet - INFO - Generation number 3
2021-06-14 20:20:06,212 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:21<00:00,  6.82s/it]

2021-06-14 20:31:28,161 - modnet - INFO - Loss per individual: ind 0: 0.640 	ind 1: 0.704 	ind 2: 0.691 	ind 3: 0.645 	ind 4: 0.655 	ind 5: 0.639 	ind 6: 0.681 	ind 7: 0.660 	ind 8: 0.744 	ind 9: 0.651 	ind 10: 0.706 	ind 11: 0.685 	ind 12: 0.687 	ind 13: 0.664 	ind 14: 0.739 	ind 15: 0.671 	ind 16: 0.625 	ind 17: 0.714 	ind 18: 0.632 	ind 19: 0.672 	





2021-06-14 20:31:29,617 - modnet - INFO - Generation number 4
2021-06-14 20:31:32,672 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:30<00:00,  6.90s/it]

2021-06-14 20:43:02,752 - modnet - INFO - Loss per individual: ind 0: 0.709 	ind 1: 0.631 	ind 2: 0.680 	ind 3: 0.630 	ind 4: 0.637 	ind 5: 0.680 	ind 6: 0.717 	ind 7: 0.705 	ind 8: 0.651 	ind 9: 0.638 	ind 10: 0.648 	ind 11: 0.792 	ind 12: 0.662 	ind 13: 0.625 	ind 14: 0.694 	ind 15: 0.657 	ind 16: 0.683 	ind 17: 0.621 	ind 18: 0.635 	ind 19: 0.631 	





2021-06-14 20:43:04,373 - modnet - INFO - Generation number 5
2021-06-14 20:43:07,758 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:48<00:00,  7.69s/it]

2021-06-14 20:55:56,390 - modnet - INFO - Loss per individual: ind 0: 0.609 	ind 1: 0.651 	ind 2: 0.619 	ind 3: 0.656 	ind 4: 0.724 	ind 5: 0.692 	ind 6: 0.641 	ind 7: 0.809 	ind 8: 0.698 	ind 9: 0.617 	ind 10: 0.755 	ind 11: 0.700 	ind 12: 0.616 	ind 13: 0.634 	ind 14: 0.688 	ind 15: 0.607 	ind 16: 0.778 	ind 17: 0.642 	ind 18: 0.656 	ind 19: 0.671 	





2021-06-14 20:55:58,102 - modnet - INFO - Generation number 6
2021-06-14 20:56:01,716 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [09:31<00:00,  5.72s/it]

2021-06-14 21:05:33,598 - modnet - INFO - Loss per individual: ind 0: 0.618 	ind 1: 0.658 	ind 2: 0.626 	ind 3: 0.688 	ind 4: 0.687 	ind 5: 0.699 	ind 6: 0.651 	ind 7: 0.640 	ind 8: 0.662 	ind 9: 0.683 	ind 10: 0.604 	ind 11: 0.711 	ind 12: 0.662 	ind 13: 0.615 	ind 14: 0.686 	ind 15: 0.599 	ind 16: 0.635 	ind 17: 0.649 	ind 18: 0.657 	ind 19: 0.697 	





2021-06-14 21:05:35,107 - modnet - INFO - Generation number 7
2021-06-14 21:05:38,391 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:17<00:00,  6.77s/it]

2021-06-14 21:16:55,784 - modnet - INFO - Loss per individual: ind 0: 0.636 	ind 1: 0.654 	ind 2: 0.659 	ind 3: 0.675 	ind 4: 0.620 	ind 5: 0.665 	ind 6: 0.800 	ind 7: 0.668 	ind 8: 0.683 	ind 9: 0.644 	ind 10: 0.642 	ind 11: 0.695 	ind 12: 0.662 	ind 13: 0.704 	ind 14: 0.616 	ind 15: 0.653 	ind 16: 0.657 	ind 17: 0.696 	ind 18: 0.634 	ind 19: 0.721 	





2021-06-14 21:16:57,216 - modnet - INFO - Generation number 8
2021-06-14 21:17:00,719 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:51<00:00,  6.52s/it]

2021-06-14 21:27:52,450 - modnet - INFO - Loss per individual: ind 0: 0.639 	ind 1: 0.638 	ind 2: 0.669 	ind 3: 0.633 	ind 4: 0.694 	ind 5: 0.725 	ind 6: 0.640 	ind 7: 0.679 	ind 8: 0.616 	ind 9: 0.616 	ind 10: 0.641 	ind 11: 0.670 	ind 12: 0.612 	ind 13: 0.647 	ind 14: 0.697 	ind 15: 0.652 	ind 16: 0.645 	ind 17: 0.623 	ind 18: 0.634 	ind 19: 0.651 	





2021-06-14 21:27:54,091 - modnet - INFO - Generation number 9
2021-06-14 21:27:58,493 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [09:49<00:00,  5.89s/it]

2021-06-14 21:37:47,542 - modnet - INFO - Loss per individual: ind 0: 0.601 	ind 1: 0.642 	ind 2: 0.646 	ind 3: 0.728 	ind 4: 0.736 	ind 5: 0.733 	ind 6: 0.640 	ind 7: 0.627 	ind 8: 0.634 	ind 9: 0.625 	ind 10: 0.670 	ind 11: 0.670 	ind 12: 0.649 	ind 13: 0.681 	ind 14: 0.648 	ind 15: 0.644 	ind 16: 0.780 	ind 17: 0.601 	ind 18: 0.656 	ind 19: 0.678 	





2021-06-14 21:37:49,168 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  5.58it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.49it/s]
 10%|█         | 1/10 [00:00<00:01,  6.10it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.75it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           945.136
  RMSE          10258.307
  MDAE          0.029
  MARPD         136.884
  R2            -48404841.095
  Correlation   -0.020
  Root-mean-squared Calibration Error   0.109
  Mean-absolute Calibration Error       0.104
  Miscalibration Area                   0.105
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.168
     Group Size: 0.56 -- Calibration Error: 0.124
     Group Size: 1.00 -- Calibration Error: 0.104
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.164
     Group Size: 0.56 -- Calibration Error: 0.133
     Group Size: 1.00 -- Calibration Error: 0.109
  Sharpness   4089.206
  Negative-log-likelihood   3.097
  CRPS                      734.043
  Check Score               370.794
  Interval Score            3977.597
MAE =
0.29128523

100%|██████████| 100/100 [17:35<00:00, 10.55s/it] 

2021-06-14 21:55:46,001 - modnet - INFO - Loss per individual: ind 0: 2.112 	ind 1: 1.087 	ind 2: 1.699 	ind 3: 2.547 	ind 4: 0.631 	ind 5: 3.865 	ind 6: 0.688 	ind 7: 0.669 	ind 8: 0.721 	ind 9: 0.621 	ind 10: 0.614 	ind 11: 0.597 	ind 12: 10.027 	ind 13: 0.900 	ind 14: 0.639 	ind 15: 0.672 	ind 16: 3.067 	ind 17: 0.742 	ind 18: 0.649 	ind 19: 3.290 	





2021-06-14 21:55:47,862 - modnet - INFO - Generation number 1
2021-06-14 21:55:51,822 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [22:29<00:00, 13.49s/it]

2021-06-14 22:18:21,249 - modnet - INFO - Loss per individual: ind 0: 0.633 	ind 1: 0.623 	ind 2: 0.634 	ind 3: 0.623 	ind 4: 0.609 	ind 5: 0.664 	ind 6: 0.614 	ind 7: 0.636 	ind 8: 0.627 	ind 9: 0.652 	ind 10: 0.614 	ind 11: 0.658 	ind 12: 0.607 	ind 13: 0.663 	ind 14: 1.892 	ind 15: 0.619 	ind 16: 0.606 	ind 17: 0.602 	ind 18: 4.332 	ind 19: 0.629 	





2021-06-14 22:18:22,968 - modnet - INFO - Generation number 2
2021-06-14 22:18:26,998 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [37:36<00:00, 22.57s/it]  

2021-06-14 22:56:04,123 - modnet - INFO - Loss per individual: ind 0: 0.648 	ind 1: 0.657 	ind 2: 0.626 	ind 3: 0.678 	ind 4: 0.615 	ind 5: 0.621 	ind 6: 0.601 	ind 7: 0.614 	ind 8: 0.654 	ind 9: 0.627 	ind 10: 0.600 	ind 11: 0.703 	ind 12: 0.628 	ind 13: 0.696 	ind 14: 0.604 	ind 15: 0.622 	ind 16: 0.618 	ind 17: 0.625 	ind 18: 0.663 	ind 19: 0.665 	





2021-06-14 22:56:05,718 - modnet - INFO - Generation number 3
2021-06-14 22:56:10,004 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:10<00:00,  7.30s/it]

2021-06-14 23:08:20,107 - modnet - INFO - Loss per individual: ind 0: 0.659 	ind 1: 0.620 	ind 2: 0.664 	ind 3: 0.625 	ind 4: 0.613 	ind 5: 0.621 	ind 6: 0.632 	ind 7: 0.657 	ind 8: 0.644 	ind 9: 0.669 	ind 10: 0.659 	ind 11: 0.619 	ind 12: 0.645 	ind 13: 0.630 	ind 14: 0.630 	ind 15: 0.622 	ind 16: 0.622 	ind 17: 0.611 	ind 18: 0.602 	ind 19: 0.629 	





2021-06-14 23:08:21,598 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  8.07it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  6.81it/s]
 10%|█         | 1/10 [00:00<00:01,  7.31it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  6.30it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.321
  RMSE          0.734
  MDAE          0.020
  MARPD         137.974
  R2            0.813
  Correlation   0.905
  Root-mean-squared Calibration Error   0.082
  Mean-absolute Calibration Error       0.070
  Miscalibration Area                   0.071
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.138
     Group Size: 0.56 -- Calibration Error: 0.085
     Group Size: 1.00 -- Calibration Error: 0.070
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.147
     Group Size: 0.56 -- Calibration Error: 0.100
     Group Size: 1.00 -- Calibration Error: 0.082
  Sharpness   0.329
  Negative-log-likelihood   3.126
  CRPS                      0.253
  Check Score               0.128
  Interval Score            1.689
MAE =
0.3208305416901097
uncertainty =
0.

100%|██████████| 100/100 [10:57<00:00,  6.58s/it]

2021-06-14 23:19:32,350 - modnet - INFO - Loss per individual: ind 0: 0.611 	ind 1: 0.770 	ind 2: 0.576 	ind 3: 0.573 	ind 4: 0.654 	ind 5: 0.637 	ind 6: 0.635 	ind 7: 1.106 	ind 8: 0.627 	ind 9: 0.648 	ind 10: 0.592 	ind 11: 0.819 	ind 12: 0.607 	ind 13: 0.607 	ind 14: 0.613 	ind 15: 0.696 	ind 16: 0.828 	ind 17: 0.648 	ind 18: 0.644 	ind 19: 0.615 	





2021-06-14 23:19:33,661 - modnet - INFO - Generation number 1
2021-06-14 23:19:36,861 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [09:35<00:00,  5.75s/it]

2021-06-14 23:29:12,090 - modnet - INFO - Loss per individual: ind 0: 0.603 	ind 1: 0.632 	ind 2: 1.195 	ind 3: 1.451 	ind 4: 0.591 	ind 5: 0.701 	ind 6: 0.666 	ind 7: 0.756 	ind 8: 0.614 	ind 9: 0.590 	ind 10: 0.643 	ind 11: 1.294 	ind 12: 0.617 	ind 13: 0.591 	ind 14: 0.594 	ind 15: 2.881 	ind 16: 0.570 	ind 17: 0.612 	ind 18: 0.648 	ind 19: 0.593 	





2021-06-14 23:29:13,857 - modnet - INFO - Generation number 2
2021-06-14 23:29:18,361 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [09:51<00:00,  5.92s/it]

2021-06-14 23:39:10,228 - modnet - INFO - Loss per individual: ind 0: 0.616 	ind 1: 0.645 	ind 2: 0.624 	ind 3: 0.583 	ind 4: 0.604 	ind 5: 0.598 	ind 6: 0.607 	ind 7: 0.625 	ind 8: 0.610 	ind 9: 0.621 	ind 10: 0.615 	ind 11: 0.550 	ind 12: 0.713 	ind 13: 0.613 	ind 14: 0.618 	ind 15: 0.620 	ind 16: 0.588 	ind 17: 0.603 	ind 18: 0.715 	ind 19: 0.611 	





2021-06-14 23:39:12,070 - modnet - INFO - Generation number 3
2021-06-14 23:39:16,951 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:21<00:00,  6.81s/it]

2021-06-14 23:50:38,373 - modnet - INFO - Loss per individual: ind 0: 0.667 	ind 1: 0.590 	ind 2: 0.671 	ind 3: 0.670 	ind 4: 0.643 	ind 5: 0.701 	ind 6: 0.724 	ind 7: 0.592 	ind 8: 0.634 	ind 9: 0.604 	ind 10: 0.585 	ind 11: 0.659 	ind 12: 0.589 	ind 13: 0.607 	ind 14: 0.632 	ind 15: 0.591 	ind 16: 0.592 	ind 17: 0.599 	ind 18: 0.662 	ind 19: 0.635 	





2021-06-14 23:50:40,368 - modnet - INFO - Generation number 4
2021-06-14 23:50:45,730 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [14:15<00:00,  8.55s/it]

2021-06-15 00:05:00,898 - modnet - INFO - Loss per individual: ind 0: 0.643 	ind 1: 0.630 	ind 2: 0.592 	ind 3: 0.586 	ind 4: 0.629 	ind 5: 0.566 	ind 6: 0.596 	ind 7: 0.676 	ind 8: 0.578 	ind 9: 0.617 	ind 10: 0.659 	ind 11: 0.725 	ind 12: 0.578 	ind 13: 0.594 	ind 14: 0.619 	ind 15: 0.636 	ind 16: 1.972 	ind 17: 0.600 	ind 18: 0.588 	ind 19: 0.612 	





2021-06-15 00:05:02,715 - modnet - INFO - Generation number 5
2021-06-15 00:05:08,250 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:44<00:00,  7.05s/it]

2021-06-15 00:16:52,905 - modnet - INFO - Loss per individual: ind 0: 0.573 	ind 1: 0.619 	ind 2: 0.568 	ind 3: 0.583 	ind 4: 0.574 	ind 5: 0.593 	ind 6: 0.713 	ind 7: 0.589 	ind 8: 0.700 	ind 9: 0.580 	ind 10: 0.617 	ind 11: 0.596 	ind 12: 0.852 	ind 13: 0.588 	ind 14: 0.598 	ind 15: 0.703 	ind 16: 0.558 	ind 17: 0.601 	ind 18: 0.598 	ind 19: 0.598 	





2021-06-15 00:16:54,786 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  5.88it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.20it/s]
 10%|█         | 1/10 [00:00<00:01,  5.77it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.28it/s]

 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.368
  RMSE          0.744
  MDAE          0.038
  MARPD         137.341
  R2            0.817
  Correlation   0.905
  Root-mean-squared Calibration Error   0.075
  Mean-absolute Calibration Error       0.053
  Miscalibration Area                   0.054
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.129
     Group Size: 0.56 -- Calibration Error: 0.077
     Group Size: 1.00 -- Calibration Error: 0.053
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.143
     Group Size: 0.56 -- Calibration Error: 0.100
     Group Size: 1.00 -- Calibration Error: 0.075
  Sharpness   0.281
  Negative-log-likelihood   8.248
  CRPS                      0.295
  Check Score               0.149
  Interval Score            2.003
MAE =
0.36778862158025194
uncertainty =
0




In [11]:
maes

array([0.3958398 , 0.35454262, 0.29128523, 0.32083054, 0.36778862])

In [12]:
maes.mean()

0.34605736223726014

In [13]:
uncertainties

array([0.2193156 , 0.19080055, 0.17977706, 0.18877757, 0.17967668])

In [14]:
uncertainties.mean()

0.19166949093341829

In [15]:
metricss

[{'accuracy': {'mae': 0.39583979969546573,
   'rmse': 0.8585004758684266,
   'mdae': 0.07177907228469849,
   'marpd': 134.32681348338014,
   'r2': 0.7144733248240083,
   'corr': 0.8640669855670066},
  'avg_calibration': {'rms_cal': 0.07052779714067306,
   'ma_cal': 0.05581622678396872,
   'miscal_area': 0.056357731128329994},
  'adv_group_calibration': {'ma_adv_group_cal': {'group_sizes': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
           0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ]),
    'adv_group_cali_mean': array([0.42358081, 0.12136151, 0.09064195, 0.07555024, 0.07019225,
           0.07075613, 0.06661291, 0.06351702, 0.06030773, 0.05581623]),
    'adv_group_cali_stderr': array([4.05586300e-02, 3.27459087e-02, 1.67073727e-02, 7.47382366e-03,
           5.53433886e-03, 6.08288658e-03, 3.61350826e-03, 1.79924678e-03,
           1.86031052e-03, 7.31423639e-18])},
   'rms_adv_group_cal': {'group_sizes': array([0.        , 0.11111111, 0.2222222

#### Conclusion

This value of the MAE will be our benchmark value for comparing the different methods in this repository.