# MODNet on experimental dataset 

MODNet model with the GA, and doing joint learning on the experimental and PBE band gaps, and their difference.

In [1]:
def setup_threading():
    import os
    os.environ['OPENBLAS_NUM_THREADS'] = '1'
    os.environ['MKL_NUM_THREADS'] = '1'
    os.environ["OMP_NUM_THREADS"] = "1"
    os.environ["TF_NUM_INTRAOP_THREADS"] = "1"
    os.environ["TF_NUM_INTEROP_THREADS"] = "1"
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
setup_threading()

In [3]:
from modnet.preprocessing import MODData
from modnet.models.vanilla import MODNetModel
from modnet.hyper_opt.fit_genetic import FitGenetic
import numpy as np
import os
import copy

from collections import defaultdict
import itertools
import os
import pandas as pd
import matplotlib.pyplot as plt 
from IPython.display import Markdown
from matminer.datasets import load_dataset, get_all_dataset_info
from pymatgen.core import Composition

from modnet.featurizers import MODFeaturizer
from modnet.featurizers.presets import DeBreuck2020Featurizer

os.environ["CUDA_VISIBLE_DEVICES"] = "1"


If you use the ChemEnv tool for your research, please consider citing the following reference(s) :
David Waroquiers, Xavier Gonze, Gian-Marco Rignanese, Cathrin Welker-Nieuwoudt, Frank Rosowski,
Michael Goebel, Stephan Schenk, Peter Degelmann, Rute Andre, Robert Glaum, and Geoffroy Hautier,
"Statistical analysis of coordination environments in oxides",
Chem. Mater., 2017, 29 (19), pp 8346-8360,
DOI: 10.1021/acs.chemmater.7b02766



In [4]:
from sklearn.model_selection import KFold
from modnet.preprocessing import MODData

def shuffle_MD(data,random_state=10):
    data = copy.deepcopy(data)
    ids = data.df_targets.sample(frac=1,random_state=random_state).index
    data.df_featurized = data.df_featurized.loc[ids]
    data.df_targets = data.df_targets.loc[ids]
    data.df_structure = data.df_structure.loc[ids]
    
    return data

def MDKsplit(data,n_splits=5,random_state=10):
    data = shuffle_MD(data,random_state=random_state)
    ids = np.array(data.structure_ids)
    kf = KFold(n_splits=n_splits,shuffle=True,random_state=random_state)
    folds = []
    for train_idx, val_idx in kf.split(ids):
        data_train = MODData(data.df_structure.iloc[train_idx]['structure'].values,data.df_targets.iloc[train_idx].values,target_names=data.df_targets.columns,structure_ids=ids[train_idx])
        data_train.df_featurized = data.df_featurized.iloc[train_idx]
        #data_train.optimal_features = data.optimal_features
        
        data_val = MODData(data.df_structure.iloc[val_idx]['structure'].values,data.df_targets.iloc[val_idx].values,target_names=data.df_targets.columns,structure_ids=ids[val_idx])
        data_val.df_featurized = data.df_featurized.iloc[val_idx]
        #data_val.optimal_features = data.optimal_features

        folds.append((data_train,data_val))
        
    return folds

In [5]:
md = MODData.load('exp_gap_all')
df = pd.read_pickle('df_exp_cleaned.pkl')
md_joint = MODData.load('exp_pbe_joint')

2021-06-14 17:44:07,236 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f3e1ffb6d30> object, created with modnet version <=0.1.7
2021-06-14 17:44:07,865 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f3e94e40430> object, created with modnet version <=0.1.7


In [6]:
md_joint.df_targets

Unnamed: 0,exp_gap,pbe_gap,difference
mp-12699,0.00,0.0000,0.0000
mp-559459,3.40,2.1704,1.2296
mp-21162,0.00,0.0000,0.0000
mp-1306,0.00,0.0000,0.0000
mp-15252,0.53,0.1049,0.4251
...,...,...,...
mp-556541,1.80,1.5135,0.2865
mp-2602,1.04,0.0000,1.0400
mp-30366,0.00,0.0000,0.0000
mp-3718,1.45,0.2418,1.2082


In [7]:
md_joint.df_structure

Unnamed: 0_level_0,structure
id,Unnamed: 1_level_1
mp-12699,"[[ 0.13616363 -2.60631371 1.50340165] Ca, [-1..."
mp-559459,"[[2.14049282 6.43464803 1.76199846] Cs, [-0.70..."
mp-21162,"[[0. 0. 0.] Sr, [0. 2.5259875 2.5259875..."
mp-1306,"[[3.259129 1.88166243 1.15971025] Th, [ 3.25..."
mp-15252,"[[0.95531657 1.042239 0.86229681] Cu, [2.803..."
...,...
mp-556541,"[[0. 6.49363948 5.82657217] In, [ 0. ..."
mp-2602,"[[3.023318 3.023318 3.023318] Nd, [0. 0. 0.] As]"
mp-30366,"[[4.51774558 1.37529211 4.32659015] Ca, [1.109..."
mp-3718,"[[2.0697935 0. 1.28932533] Th, [0. ..."


In [8]:
import uncertainty_toolbox as uct

k = 5
random_state = 202010
folds = MDKsplit(md_joint,n_splits=k,random_state=random_state)
maes = np.ones(5)
uncertainties = np.ones(5)
metricss = []
for i,f in enumerate(folds):
    train = f[0]
    test = f[1]
    #train.feature_selection(n=-1, use_precomputed_cross_nmi=True)
    fpath = 'train_{}_{}'.format(random_state,i+1)
    if os.path.exists(fpath):
        train = MODData.load(fpath)
    else:
        train.feature_selection(n=-1, use_precomputed_cross_nmi=True)
        train.save(fpath)
      
    # assure no overlap
    assert len(set(train.df_targets.index).intersection(set(test.df_targets.index))) == 0
    
    ga = FitGenetic(train)
    model = ga.run(size_pop=20, num_generations=10, n_jobs=10)
    
    pred, std = model.predict(test, return_unc=True)
    true = test.df_targets
    metrics = uct.metrics.get_all_metrics(pred['exp_gap'].values, std['exp_gap'].values, true['exp_gap'].values)
    error = pred['exp_gap']-true['exp_gap']
    error = error.drop(pred.index[((pred['exp_gap']).abs()>20)]) # drop unrealistic values: happens extremely rarely
    std = std['exp_gap'].drop(std.index[((std['exp_gap']).abs()>20)]) # drop unrealistic values: happens extremely rarely
    mae = np.abs(error.values).mean()
    uncertainty = np.abs(std).mean()
    print('MAE =')
    print(mae)
    print('uncertainty =')
    print(uncertainty)
    maes[i] = mae
    uncertainties[i] = uncertainty
    metricss.append(metrics)

2021-06-14 17:44:08,036 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,059 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,069 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,085 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,095 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,111 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,121 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,147 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,160 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:08,179 - modnet - INFO - Loaded DeBreuck2020Featurizer featurizer.
2021-06-14 17:44:09,053 - modnet - INFO - Loaded <modnet.preprocessing.MODData object at 0x7f3e0db3edc0> object, created with modnet version 0.1.9
2021-06-14 17

100%|██████████| 100/100 [14:08<00:00,  8.49s/it]

2021-06-14 17:58:19,170 - modnet - INFO - Loss per individual: ind 0: 0.600 	ind 1: 0.634 	ind 2: 0.619 	ind 3: 0.687 	ind 4: 0.655 	ind 5: 0.721 	ind 6: 0.597 	ind 7: 0.598 	ind 8: 0.668 	ind 9: 0.647 	ind 10: 0.651 	ind 11: 0.592 	ind 12: 0.635 	ind 13: 0.632 	ind 14: 0.758 	ind 15: 0.668 	ind 16: 0.623 	ind 17: 0.670 	ind 18: 0.639 	ind 19: 0.658 	





2021-06-14 17:58:20,876 - modnet - INFO - Generation number 1
2021-06-14 17:58:23,789 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:48<00:00,  7.68s/it]

2021-06-14 18:11:11,981 - modnet - INFO - Loss per individual: ind 0: 0.655 	ind 1: 0.590 	ind 2: 0.681 	ind 3: 0.612 	ind 4: 0.637 	ind 5: 0.690 	ind 6: 0.704 	ind 7: 0.614 	ind 8: 0.676 	ind 9: 0.601 	ind 10: 0.600 	ind 11: 0.609 	ind 12: 0.613 	ind 13: 0.650 	ind 14: 0.631 	ind 15: 0.578 	ind 16: 0.643 	ind 17: 0.584 	ind 18: 0.708 	ind 19: 0.628 	





2021-06-14 18:11:13,804 - modnet - INFO - Generation number 2
2021-06-14 18:11:16,857 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [13:21<00:00,  8.01s/it]

2021-06-14 18:24:38,117 - modnet - INFO - Loss per individual: ind 0: 0.711 	ind 1: 0.580 	ind 2: 0.619 	ind 3: 0.615 	ind 4: 0.613 	ind 5: 0.665 	ind 6: 0.653 	ind 7: 0.668 	ind 8: 0.617 	ind 9: 0.683 	ind 10: 0.626 	ind 11: 0.598 	ind 12: 0.596 	ind 13: 0.628 	ind 14: 0.638 	ind 15: 0.595 	ind 16: 0.631 	ind 17: 0.657 	ind 18: 0.615 	ind 19: 0.628 	





2021-06-14 18:24:39,826 - modnet - INFO - Generation number 3
2021-06-14 18:24:42,895 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [15:21<00:00,  9.22s/it]

2021-06-14 18:40:04,597 - modnet - INFO - Loss per individual: ind 0: 0.604 	ind 1: 0.641 	ind 2: 0.615 	ind 3: 0.575 	ind 4: 0.627 	ind 5: 0.673 	ind 6: 0.621 	ind 7: 0.645 	ind 8: 0.635 	ind 9: 0.619 	ind 10: 0.651 	ind 11: 0.626 	ind 12: 0.630 	ind 13: 0.608 	ind 14: 0.600 	ind 15: 0.615 	ind 16: 0.632 	ind 17: 0.615 	ind 18: 0.609 	ind 19: 0.617 	





2021-06-14 18:40:05,980 - modnet - INFO - Generation number 4
2021-06-14 18:40:08,577 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:52<00:00,  7.12s/it]

2021-06-14 18:52:00,884 - modnet - INFO - Loss per individual: ind 0: 0.605 	ind 1: 0.622 	ind 2: 0.669 	ind 3: 0.618 	ind 4: 0.655 	ind 5: 0.632 	ind 6: 0.625 	ind 7: 0.629 	ind 8: 0.648 	ind 9: 0.629 	ind 10: 0.610 	ind 11: 0.613 	ind 12: 0.660 	ind 13: 0.587 	ind 14: 0.665 	ind 15: 0.591 	ind 16: 0.697 	ind 17: 0.610 	ind 18: 0.703 	ind 19: 0.606 	





2021-06-14 18:52:02,767 - modnet - INFO - Generation number 5
2021-06-14 18:52:06,260 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [18:28<00:00, 11.08s/it]

2021-06-14 19:10:34,591 - modnet - INFO - Loss per individual: ind 0: 0.689 	ind 1: 0.575 	ind 2: 0.608 	ind 3: 0.632 	ind 4: 0.625 	ind 5: 0.738 	ind 6: 0.609 	ind 7: 0.647 	ind 8: 0.595 	ind 9: 0.633 	ind 10: 0.613 	ind 11: 0.606 	ind 12: 0.670 	ind 13: 0.796 	ind 14: 0.605 	ind 15: 0.609 	ind 16: 0.604 	ind 17: 0.683 	ind 18: 0.625 	ind 19: 0.577 	





2021-06-14 19:10:36,249 - modnet - INFO - Generation number 6
2021-06-14 19:10:39,425 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [17:31<00:00, 10.52s/it]

2021-06-14 19:28:11,036 - modnet - INFO - Loss per individual: ind 0: 0.632 	ind 1: 0.627 	ind 2: 0.640 	ind 3: 0.588 	ind 4: 0.594 	ind 5: 0.675 	ind 6: 0.599 	ind 7: 0.631 	ind 8: 0.625 	ind 9: 0.624 	ind 10: 0.647 	ind 11: 0.660 	ind 12: 0.621 	ind 13: 0.694 	ind 14: 0.636 	ind 15: 0.605 	ind 16: 0.667 	ind 17: 0.649 	ind 18: 0.765 	ind 19: 0.649 	





2021-06-14 19:28:12,763 - modnet - INFO - Generation number 7
2021-06-14 19:28:16,212 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [13:34<00:00,  8.14s/it]

2021-06-14 19:41:50,528 - modnet - INFO - Loss per individual: ind 0: 0.604 	ind 1: 0.619 	ind 2: 0.605 	ind 3: 0.691 	ind 4: 0.640 	ind 5: 0.656 	ind 6: 0.601 	ind 7: 0.616 	ind 8: 0.618 	ind 9: 0.651 	ind 10: 0.637 	ind 11: 0.647 	ind 12: 0.649 	ind 13: 0.618 	ind 14: 0.645 	ind 15: 0.635 	ind 16: 0.607 	ind 17: 0.688 	ind 18: 0.625 	ind 19: 0.605 	





2021-06-14 19:41:52,111 - modnet - INFO - Generation number 8
2021-06-14 19:41:55,341 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:26<00:00,  7.46s/it]

2021-06-14 19:54:21,419 - modnet - INFO - Loss per individual: ind 0: 0.621 	ind 1: 0.638 	ind 2: 0.595 	ind 3: 0.675 	ind 4: 0.693 	ind 5: 0.739 	ind 6: 0.630 	ind 7: 0.687 	ind 8: 0.755 	ind 9: 0.715 	ind 10: 0.580 	ind 11: 0.610 	ind 12: 0.661 	ind 13: 0.599 	ind 14: 0.635 	ind 15: 0.630 	ind 16: 0.573 	ind 17: 0.595 	ind 18: 0.659 	ind 19: 0.574 	





2021-06-14 19:54:23,069 - modnet - INFO - Generation number 9
2021-06-14 19:54:26,702 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [14:07<00:00,  8.48s/it]

2021-06-14 20:08:34,276 - modnet - INFO - Loss per individual: ind 0: 0.638 	ind 1: 0.628 	ind 2: 0.629 	ind 3: 0.639 	ind 4: 0.646 	ind 5: 0.582 	ind 6: 0.652 	ind 7: 0.622 	ind 8: 0.613 	ind 9: 0.614 	ind 10: 0.642 	ind 11: 0.651 	ind 12: 0.634 	ind 13: 0.674 	ind 14: 0.645 	ind 15: 0.632 	ind 16: 0.719 	ind 17: 0.698 	ind 18: 0.643 	ind 19: 0.632 	





 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  6.23it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.54it/s]
 10%|█         | 1/10 [00:00<00:01,  6.02it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.39it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.367
  RMSE          0.770
  MDAE          0.065
  MARPD         133.590
  R2            0.770
  Correlation   0.884
  Root-mean-squared Calibration Error   0.121
  Mean-absolute Calibration Error       0.106
  Miscalibration Area                   0.107
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.169
     Group Size: 0.56 -- Calibration Error: 0.125
     Group Size: 1.00 -- Calibration Error: 0.106
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.195
     Group Size: 0.56 -- Calibration Error: 0.143
     Group Size: 1.00 -- Calibration Error: 0.121
  Sharpness   0.396
  Negative-log-likelihood   1.320
  CRPS                      0.290
  Check Score               0.146
  Interval Score            1.900
MAE =
0.3667530573177938
uncertainty =
0.

100%|██████████| 100/100 [11:18<00:00,  6.78s/it]

2021-06-14 20:20:15,077 - modnet - INFO - Loss per individual: ind 0: 0.758 	ind 1: 0.598 	ind 2: 0.621 	ind 3: 0.768 	ind 4: 0.723 	ind 5: 0.643 	ind 6: 0.644 	ind 7: 0.606 	ind 8: 0.597 	ind 9: 0.620 	ind 10: 0.619 	ind 11: 0.608 	ind 12: 0.656 	ind 13: 0.678 	ind 14: 0.640 	ind 15: 0.705 	ind 16: 0.620 	ind 17: 0.617 	ind 18: 0.644 	ind 19: 0.749 	





2021-06-14 20:20:16,652 - modnet - INFO - Generation number 1
2021-06-14 20:20:19,988 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:02<00:00,  6.62s/it]

2021-06-14 20:31:22,364 - modnet - INFO - Loss per individual: ind 0: 0.615 	ind 1: 0.650 	ind 2: 0.618 	ind 3: 0.606 	ind 4: 0.600 	ind 5: 0.651 	ind 6: 0.664 	ind 7: 0.604 	ind 8: 0.591 	ind 9: 0.648 	ind 10: 0.647 	ind 11: 0.663 	ind 12: 0.750 	ind 13: 0.666 	ind 14: 0.611 	ind 15: 0.609 	ind 16: 0.769 	ind 17: 0.670 	ind 18: 0.618 	ind 19: 0.597 	





2021-06-14 20:31:23,854 - modnet - INFO - Generation number 2
2021-06-14 20:31:27,126 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:20<00:00,  6.20s/it]

2021-06-14 20:41:47,644 - modnet - INFO - Loss per individual: ind 0: 0.612 	ind 1: 0.602 	ind 2: 0.598 	ind 3: 0.586 	ind 4: 0.674 	ind 5: 0.617 	ind 6: 0.641 	ind 7: 0.697 	ind 8: 0.597 	ind 9: 0.608 	ind 10: 0.610 	ind 11: 0.649 	ind 12: 0.623 	ind 13: 0.815 	ind 14: 0.616 	ind 15: 0.612 	ind 16: 0.808 	ind 17: 0.624 	ind 18: 0.619 	ind 19: 0.618 	





2021-06-14 20:41:48,984 - modnet - INFO - Generation number 3
2021-06-14 20:41:52,013 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:47<00:00,  7.08s/it]

2021-06-14 20:53:39,815 - modnet - INFO - Loss per individual: ind 0: 0.717 	ind 1: 0.587 	ind 2: 0.621 	ind 3: 0.579 	ind 4: 0.654 	ind 5: 0.622 	ind 6: 0.609 	ind 7: 0.602 	ind 8: 0.641 	ind 9: 0.628 	ind 10: 0.614 	ind 11: 0.620 	ind 12: 0.736 	ind 13: 0.625 	ind 14: 0.599 	ind 15: 0.594 	ind 16: 0.623 	ind 17: 0.622 	ind 18: 0.639 	ind 19: 0.611 	





2021-06-14 20:53:41,331 - modnet - INFO - Generation number 4
2021-06-14 20:53:44,861 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:58<00:00,  6.59s/it]

2021-06-14 21:04:43,590 - modnet - INFO - Loss per individual: ind 0: 0.690 	ind 1: 0.602 	ind 2: 0.655 	ind 3: 0.617 	ind 4: 0.642 	ind 5: 0.618 	ind 6: 0.682 	ind 7: 0.603 	ind 8: 0.594 	ind 9: 0.635 	ind 10: 0.626 	ind 11: 0.601 	ind 12: 0.600 	ind 13: 0.612 	ind 14: 0.602 	ind 15: 0.587 	ind 16: 0.644 	ind 17: 0.609 	ind 18: 0.647 	ind 19: 0.643 	





2021-06-14 21:04:44,981 - modnet - INFO - Generation number 5
2021-06-14 21:04:48,112 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:27<00:00,  6.28s/it]

2021-06-14 21:15:16,102 - modnet - INFO - Loss per individual: ind 0: 0.607 	ind 1: 0.631 	ind 2: 0.657 	ind 3: 0.603 	ind 4: 0.613 	ind 5: 0.638 	ind 6: 0.617 	ind 7: 0.608 	ind 8: 0.660 	ind 9: 0.608 	ind 10: 0.593 	ind 11: 0.622 	ind 12: 0.613 	ind 13: 0.644 	ind 14: 0.605 	ind 15: 0.642 	ind 16: 0.637 	ind 17: 0.615 	ind 18: 0.625 	ind 19: 0.615 	





2021-06-14 21:15:17,564 - modnet - INFO - Generation number 6
2021-06-14 21:15:21,135 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:37<00:00,  6.98s/it]

2021-06-14 21:26:58,920 - modnet - INFO - Loss per individual: ind 0: 0.584 	ind 1: 0.592 	ind 2: 0.635 	ind 3: 0.633 	ind 4: 0.638 	ind 5: 0.637 	ind 6: 0.643 	ind 7: 0.725 	ind 8: 0.619 	ind 9: 0.653 	ind 10: 0.639 	ind 11: 0.640 	ind 12: 0.593 	ind 13: 0.586 	ind 14: 0.585 	ind 15: 0.597 	ind 16: 0.619 	ind 17: 0.747 	ind 18: 0.651 	ind 19: 0.620 	





2021-06-14 21:27:00,433 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  6.23it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  6.68it/s]
 10%|█         | 1/10 [00:00<00:01,  6.84it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  6.62it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.366
  RMSE          0.741
  MDAE          0.055
  MARPD         134.150
  R2            0.782
  Correlation   0.885
  Root-mean-squared Calibration Error   0.064
  Mean-absolute Calibration Error       0.055
  Miscalibration Area                   0.056
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.100
     Group Size: 0.56 -- Calibration Error: 0.067
     Group Size: 1.00 -- Calibration Error: 0.055
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.142
     Group Size: 0.56 -- Calibration Error: 0.077
     Group Size: 1.00 -- Calibration Error: 0.064
  Sharpness   0.384
  Negative-log-likelihood   9.129
  CRPS                      0.282
  Check Score               0.142
  Interval Score            1.757
MAE =
0.3663988329019684
uncertainty =
0.

100%|██████████| 100/100 [11:53<00:00,  7.13s/it]

2021-06-14 21:39:08,382 - modnet - INFO - Loss per individual: ind 0: 0.788 	ind 1: 0.637 	ind 2: 0.655 	ind 3: 0.684 	ind 4: 0.642 	ind 5: 0.638 	ind 6: 0.660 	ind 7: 0.608 	ind 8: 0.735 	ind 9: 0.682 	ind 10: 0.705 	ind 11: 0.688 	ind 12: 0.668 	ind 13: 0.706 	ind 14: 0.738 	ind 15: 0.688 	ind 16: 0.763 	ind 17: 0.671 	ind 18: 0.687 	ind 19: 0.619 	





2021-06-14 21:39:10,205 - modnet - INFO - Generation number 1
2021-06-14 21:39:14,077 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [17:45<00:00, 10.66s/it] 

2021-06-14 21:56:59,837 - modnet - INFO - Loss per individual: ind 0: 0.802 	ind 1: 0.630 	ind 2: 0.723 	ind 3: 0.705 	ind 4: 0.685 	ind 5: 0.723 	ind 6: 0.666 	ind 7: 0.639 	ind 8: 0.687 	ind 9: 0.643 	ind 10: 0.637 	ind 11: 0.628 	ind 12: 0.643 	ind 13: 0.611 	ind 14: 0.676 	ind 15: 0.667 	ind 16: 0.721 	ind 17: 0.792 	ind 18: 0.703 	ind 19: 0.864 	





2021-06-14 21:57:01,602 - modnet - INFO - Generation number 2
2021-06-14 21:57:05,482 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [24:07<00:00, 14.47s/it]  

2021-06-14 22:21:12,927 - modnet - INFO - Loss per individual: ind 0: 0.657 	ind 1: 0.728 	ind 2: 0.640 	ind 3: 0.683 	ind 4: 0.722 	ind 5: 0.599 	ind 6: 0.703 	ind 7: 0.667 	ind 8: 0.644 	ind 9: 0.640 	ind 10: 0.674 	ind 11: 0.672 	ind 12: 0.676 	ind 13: 0.731 	ind 14: 0.650 	ind 15: 0.652 	ind 16: 0.648 	ind 17: 0.692 	ind 18: 0.641 	ind 19: 0.737 	





2021-06-14 22:21:14,539 - modnet - INFO - Generation number 3
2021-06-14 22:21:18,417 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [39:05<00:00, 23.45s/it]  

2021-06-14 23:00:23,939 - modnet - INFO - Loss per individual: ind 0: 0.626 	ind 1: 0.603 	ind 2: 0.643 	ind 3: 0.713 	ind 4: 0.696 	ind 5: 0.663 	ind 6: 0.652 	ind 7: 0.669 	ind 8: 0.720 	ind 9: 0.745 	ind 10: 0.774 	ind 11: 0.711 	ind 12: 0.700 	ind 13: 0.796 	ind 14: 0.617 	ind 15: 0.628 	ind 16: 0.699 	ind 17: 0.681 	ind 18: 0.666 	ind 19: 0.655 	





2021-06-14 23:00:25,596 - modnet - INFO - Generation number 4
2021-06-14 23:00:29,951 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:08<00:00,  6.69s/it]

2021-06-14 23:11:38,765 - modnet - INFO - Loss per individual: ind 0: 0.702 	ind 1: 0.758 	ind 2: 0.645 	ind 3: 0.675 	ind 4: 0.773 	ind 5: 0.633 	ind 6: 0.631 	ind 7: 0.652 	ind 8: 0.618 	ind 9: 0.626 	ind 10: 0.638 	ind 11: 0.644 	ind 12: 0.696 	ind 13: 0.679 	ind 14: 0.716 	ind 15: 0.620 	ind 16: 0.721 	ind 17: 0.702 	ind 18: 0.723 	ind 19: 0.701 	





2021-06-14 23:11:40,417 - modnet - INFO - Generation number 5
2021-06-14 23:11:45,030 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:24<00:00,  6.84s/it]

2021-06-14 23:23:09,348 - modnet - INFO - Loss per individual: ind 0: 0.637 	ind 1: 0.619 	ind 2: 0.662 	ind 3: 0.639 	ind 4: 0.648 	ind 5: 0.654 	ind 6: 0.715 	ind 7: 0.696 	ind 8: 0.640 	ind 9: 0.710 	ind 10: 0.659 	ind 11: 0.642 	ind 12: 0.660 	ind 13: 0.673 	ind 14: 0.685 	ind 15: 0.623 	ind 16: 0.669 	ind 17: 0.719 	ind 18: 0.665 	ind 19: 0.694 	





2021-06-14 23:23:10,980 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  5.48it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.38it/s]
 10%|█         | 1/10 [00:00<00:01,  5.69it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  5.46it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           522.307
  RMSE          5667.701
  MDAE          0.026
  MARPD         136.838
  R2            -14775807.189
  Correlation   -0.020
  Root-mean-squared Calibration Error   0.048
  Mean-absolute Calibration Error       0.040
  Miscalibration Area                   0.041
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.107
     Group Size: 0.56 -- Calibration Error: 0.052
     Group Size: 1.00 -- Calibration Error: 0.040
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.105
     Group Size: 0.56 -- Calibration Error: 0.065
     Group Size: 1.00 -- Calibration Error: 0.048
  Sharpness   3148.170
  Negative-log-likelihood   2.308
  CRPS                      366.922
  Check Score               185.313
  Interval Score            1620.195
MAE =
0.282848312

100%|██████████| 100/100 [11:43<00:00,  7.04s/it]

2021-06-14 23:35:12,036 - modnet - INFO - Loss per individual: ind 0: 0.600 	ind 1: 0.615 	ind 2: 0.676 	ind 3: 0.643 	ind 4: 11.750 	ind 5: 4.952 	ind 6: 2.828 	ind 7: 0.714 	ind 8: 0.608 	ind 9: 1.389 	ind 10: 23.301 	ind 11: 0.656 	ind 12: 3.014 	ind 13: 0.650 	ind 14: 0.658 	ind 15: 1.122 	ind 16: 1.499 	ind 17: 2.946 	ind 18: 0.643 	ind 19: 2.947 	





2021-06-14 23:35:13,742 - modnet - INFO - Generation number 1
2021-06-14 23:35:17,698 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:19<00:00,  6.20s/it]

2021-06-14 23:45:37,688 - modnet - INFO - Loss per individual: ind 0: 0.648 	ind 1: 0.607 	ind 2: 0.652 	ind 3: 0.650 	ind 4: 0.630 	ind 5: 0.600 	ind 6: 0.601 	ind 7: 0.607 	ind 8: 0.651 	ind 9: 0.613 	ind 10: 0.633 	ind 11: 0.623 	ind 12: 0.612 	ind 13: 0.650 	ind 14: 0.632 	ind 15: 0.600 	ind 16: 0.619 	ind 17: 0.642 	ind 18: 0.623 	ind 19: 0.638 	





2021-06-14 23:45:39,339 - modnet - INFO - Generation number 2
2021-06-14 23:45:43,323 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:21<00:00,  6.21s/it]

2021-06-14 23:56:04,709 - modnet - INFO - Loss per individual: ind 0: 0.616 	ind 1: 0.719 	ind 2: 0.625 	ind 3: 0.603 	ind 4: 0.590 	ind 5: 0.670 	ind 6: 0.608 	ind 7: 0.633 	ind 8: 2.230 	ind 9: 0.664 	ind 10: 0.604 	ind 11: 0.623 	ind 12: 0.621 	ind 13: 0.579 	ind 14: 0.600 	ind 15: 0.621 	ind 16: 0.663 	ind 17: 0.634 	ind 18: 0.608 	ind 19: 0.626 	





2021-06-14 23:56:06,449 - modnet - INFO - Generation number 3
2021-06-14 23:56:10,980 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [09:43<00:00,  5.83s/it]

2021-06-15 00:05:54,527 - modnet - INFO - Loss per individual: ind 0: 0.665 	ind 1: 0.661 	ind 2: 0.607 	ind 3: 0.616 	ind 4: 0.665 	ind 5: 0.735 	ind 6: 0.597 	ind 7: 0.609 	ind 8: 0.604 	ind 9: 0.651 	ind 10: 0.619 	ind 11: 0.627 	ind 12: 0.628 	ind 13: 0.659 	ind 14: 0.608 	ind 15: 0.633 	ind 16: 0.659 	ind 17: 0.656 	ind 18: 0.636 	ind 19: 0.659 	





2021-06-15 00:05:56,101 - modnet - INFO - Generation number 4
2021-06-15 00:06:00,625 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [11:13<00:00,  6.74s/it]

2021-06-15 00:17:14,284 - modnet - INFO - Loss per individual: ind 0: 0.629 	ind 1: 0.607 	ind 2: 0.609 	ind 3: 0.596 	ind 4: 0.640 	ind 5: 5.276 	ind 6: 0.633 	ind 7: 0.675 	ind 8: 0.628 	ind 9: 0.636 	ind 10: 0.645 	ind 11: 0.609 	ind 12: 0.653 	ind 13: 0.608 	ind 14: 0.666 	ind 15: 0.626 	ind 16: 0.601 	ind 17: 0.619 	ind 18: 0.630 	ind 19: 0.599 	





2021-06-15 00:17:16,106 - modnet - INFO - Generation number 5
2021-06-15 00:17:21,566 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:31<00:00,  6.31s/it]

2021-06-15 00:27:53,094 - modnet - INFO - Loss per individual: ind 0: 0.629 	ind 1: 0.638 	ind 2: 0.621 	ind 3: 0.609 	ind 4: 0.621 	ind 5: 0.593 	ind 6: 0.608 	ind 7: 0.642 	ind 8: 0.612 	ind 9: 0.617 	ind 10: 0.615 	ind 11: 0.620 	ind 12: 0.624 	ind 13: 0.661 	ind 14: 0.702 	ind 15: 0.595 	ind 16: 0.614 	ind 17: 0.639 	ind 18: 0.627 	ind 19: 0.627 	





2021-06-15 00:27:54,672 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


 10%|█         | 1/10 [00:00<00:01,  7.80it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  6.87it/s]
 10%|█         | 1/10 [00:00<00:01,  8.54it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  6.60it/s]


 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.347
  RMSE          0.781
  MDAE          0.027
  MARPD         137.189
  R2            0.788
  Correlation   0.890
  Root-mean-squared Calibration Error   0.056
  Mean-absolute Calibration Error       0.042
  Miscalibration Area                   0.042
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.116
     Group Size: 0.56 -- Calibration Error: 0.051
     Group Size: 1.00 -- Calibration Error: 0.042
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.131
     Group Size: 0.56 -- Calibration Error: 0.073
     Group Size: 1.00 -- Calibration Error: 0.056
  Sharpness   0.336
  Negative-log-likelihood   40.104
  CRPS                      0.276
  Check Score               0.139
  Interval Score            1.824
MAE =
0.3467900265083331
uncertainty =
0

100%|██████████| 100/100 [11:36<00:00,  6.97s/it]

2021-06-15 00:39:47,145 - modnet - INFO - Loss per individual: ind 0: 1.878 	ind 1: 3.661 	ind 2: 1.410 	ind 3: 0.698 	ind 4: 0.625 	ind 5: 0.639 	ind 6: 0.631 	ind 7: 0.636 	ind 8: 0.631 	ind 9: 0.730 	ind 10: 0.649 	ind 11: 7.360 	ind 12: 0.662 	ind 13: 0.604 	ind 14: 0.668 	ind 15: 0.610 	ind 16: 0.654 	ind 17: 0.728 	ind 18: 0.708 	ind 19: 1.666 	





2021-06-15 00:39:48,795 - modnet - INFO - Generation number 1
2021-06-15 00:39:52,871 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [12:04<00:00,  7.25s/it]

2021-06-15 00:51:57,678 - modnet - INFO - Loss per individual: ind 0: 0.616 	ind 1: 0.614 	ind 2: 0.658 	ind 3: 0.652 	ind 4: 0.697 	ind 5: 0.634 	ind 6: 0.667 	ind 7: 0.649 	ind 8: 0.692 	ind 9: 0.626 	ind 10: 0.745 	ind 11: 0.636 	ind 12: 0.661 	ind 13: 0.611 	ind 14: 0.633 	ind 15: 0.622 	ind 16: 0.758 	ind 17: 0.630 	ind 18: 0.592 	ind 19: 0.614 	





2021-06-15 00:51:59,398 - modnet - INFO - Generation number 2
2021-06-15 00:52:03,521 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [10:59<00:00,  6.60s/it]

2021-06-15 01:03:03,622 - modnet - INFO - Loss per individual: ind 0: 0.682 	ind 1: 0.650 	ind 2: 0.760 	ind 3: 0.616 	ind 4: 0.633 	ind 5: 0.608 	ind 6: 0.661 	ind 7: 0.744 	ind 8: 0.647 	ind 9: 0.612 	ind 10: 0.618 	ind 11: 0.646 	ind 12: 0.675 	ind 13: 0.671 	ind 14: 0.666 	ind 15: 0.715 	ind 16: 0.643 	ind 17: 0.651 	ind 18: 0.623 	ind 19: 0.635 	





2021-06-15 01:03:04,778 - modnet - INFO - Generation number 3
2021-06-15 01:03:07,795 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [08:21<00:00,  5.01s/it]

2021-06-15 01:11:29,290 - modnet - INFO - Loss per individual: ind 0: 0.613 	ind 1: 0.624 	ind 2: 0.671 	ind 3: 0.627 	ind 4: 0.639 	ind 5: 0.658 	ind 6: 0.631 	ind 7: 0.682 	ind 8: 0.617 	ind 9: 0.674 	ind 10: 0.629 	ind 11: 0.624 	ind 12: 0.631 	ind 13: 0.653 	ind 14: 0.607 	ind 15: 0.605 	ind 16: 0.634 	ind 17: 0.639 	ind 18: 0.636 	ind 19: 0.647 	





2021-06-15 01:11:30,462 - modnet - INFO - Generation number 4
2021-06-15 01:11:33,972 - modnet - INFO - Multiprocessing on 10 cores. Total of 128 cores available.


100%|██████████| 100/100 [07:40<00:00,  4.61s/it]

2021-06-15 01:19:14,940 - modnet - INFO - Loss per individual: ind 0: 0.621 	ind 1: 0.598 	ind 2: 0.617 	ind 3: 0.638 	ind 4: 0.646 	ind 5: 0.608 	ind 6: 0.618 	ind 7: 0.614 	ind 8: 0.643 	ind 9: 0.637 	ind 10: 0.614 	ind 11: 0.607 	ind 12: 0.636 	ind 13: 0.613 	ind 14: 0.601 	ind 15: 0.694 	ind 16: 0.613 	ind 17: 0.642 	ind 18: 0.650 	ind 19: 0.620 	





2021-06-15 01:19:15,917 - modnet - INFO - Early stopping: same best model for 4 consecutive generations
 (1/n) Calculating accuracy metrics
 (2/n) Calculating average calibration metrics


  0%|          | 0/10 [00:00<?, ?it/s]

 (3/n) Calculating adversarial group calibration metrics
  [1/2] for mean absolute calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:01<00:00,  9.88it/s]
 20%|██        | 2/10 [00:00<00:00, 10.71it/s]

  [2/2] for root mean squared calibration error
Measuring adversarial group calibration by spanning group size between 0.0 and 1.0, in 10 intervals


100%|██████████| 10/10 [00:00<00:00, 10.21it/s]

 (4/n) Calculating sharpness metrics
 (n/n) Calculating proper scoring rule metrics
**Finished Calculating All Metrics**


  MAE           0.395
  RMSE          0.764
  MDAE          0.034
  MARPD         137.516
  R2            0.807
  Correlation   0.901
  Root-mean-squared Calibration Error   0.158
  Mean-absolute Calibration Error       0.144
  Miscalibration Area                   0.145
  Mean-absolute Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.197
     Group Size: 0.56 -- Calibration Error: 0.163
     Group Size: 1.00 -- Calibration Error: 0.144
  Root-mean-squared Adversarial Group Calibration Error
     Group Size: 0.11 -- Calibration Error: 0.225
     Group Size: 0.56 -- Calibration Error: 0.181
     Group Size: 1.00 -- Calibration Error: 0.158
  Sharpness   0.333
  Negative-log-likelihood   18.395
  CRPS                      0.310
  Check Score               0.156
  Interval Score            2.055
MAE =
0.3950795460709225
uncertainty =
0




In [9]:
maes

array([0.36675306, 0.36639883, 0.28284831, 0.34679003, 0.39507955])

In [10]:
maes.mean()

0.3515739550393092

In [11]:
uncertainties

array([0.24604288, 0.23338228, 0.18500507, 0.19868283, 0.19745734])

In [12]:
uncertainties.mean()

0.21211408078670502

In [13]:
metricss

[{'accuracy': {'mae': 0.3667530573177938,
   'rmse': 0.7697104567030121,
   'mdae': 0.06484133243560786,
   'marpd': 133.59006994269183,
   'r2': 0.770480081543925,
   'corr': 0.8843674043278756},
  'avg_calibration': {'rms_cal': 0.12101189329577201,
   'ma_cal': 0.10626588465298142,
   'miscal_area': 0.10732491537610164},
  'adv_group_calibration': {'ma_adv_group_cal': {'group_sizes': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
           0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ]),
    'adv_group_cali_mean': array([0.41220707, 0.16939394, 0.14652472, 0.13480728, 0.12572608,
           0.12490139, 0.11849757, 0.11385443, 0.1120381 , 0.10626588]),
    'adv_group_cali_stderr': array([8.07998419e-02, 1.75891600e-02, 1.52666803e-02, 1.23398990e-02,
           1.06878253e-02, 8.77957956e-03, 5.01146067e-03, 4.42735620e-03,
           2.09615779e-03, 1.46284728e-17])},
   'rms_adv_group_cal': {'group_sizes': array([0.        , 0.11111111, 0.22222222, 

#### Conclusion

Relative percentage change of the error compared to MODNet_exp_GA reference: -2%