# The importance of good co-ordinates when using MACEst

In [None]:
%load_ext autoreload
%autoreload 2

In [12]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd

# import seaborn as sns 

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import scale

from sklearn.calibration import CalibratedClassifierCV
from sklearn.decomposition import PCA
from sklearn.metrics import log_loss, brier_score_loss
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NeighborhoodComponentsAnalysis

from macest.classification import models as clmod
from macest.classification import plots as clplot

RuntimeError: module compiled against API version 0xe but this version of numpy is 0xd

ImportError: numpy.core.multiarray failed to import

In [None]:
# sns.set_context("talk")
# sns.set_context("poster")
sns.set_context("notebook")
sns.set_style('darkgrid')

In [None]:
cols = ['lettr',
'x-box',
'y-box',
'width',
'high',
'onpix',
'x-bar',
'y-bar',
'x2bar',
'y2bar',
'xybar',
'x2ybr',
'xy2br',
'x-ege',
'xegvy',
'y-ege',
'yegvx']

# The German Credit XGBoost model

### The Data

The German Credit data set is a publically available data set downloaded from the UCI Machine Learning Repository. The data contains data on 20 variables and the classification whether an applicant is considered a Good or a Bad credit risk for 1000 loan applicants.

#### [Data Source](https://archive.ics.uci.edu/ml/datasets/statlog+(german+credit+data))
- Professor Dr. Hans Hofmann  
- Institut f"ur Statistik und "Okonometrie  
- Universit"at Hamburg  
- FB Wirtschaftswissenschaften  
- Von-Melle-Park 5    
- 2000 Hamburg 13

#### Benchmark
![Credit Risk Classification: Faster Machine Learning with Intel Optimized Packages](https://i.imgur.com/nL1l7WI.png)

according to [1] the best model is Random Forest with balanced feature selection data. it's has Accuracy 82%, Precision 84%, Recall 82% and F1-Score 81%. 

<br>


The goal of this kernel is to beat The benchmark with  :
- Convert dataset to Machine Learning friendly (Feature Engginering)
- Develop XGBoost model to predict whether a loan is a good or bad risk.
- Find the Best parameter for XGBoost Model (Hyperparameter Tunning)
- Beat the Benchmark

### Import Library

In [None]:
#Importing necessary packages in Python 
%matplotlib inline 
import matplotlib.pyplot as plt 

import numpy as np ; np.random.seed(sum(map(ord, "aesthetics")))
import pandas as pd

from sklearn.datasets import make_classification 
from sklearn.model_selection import learning_curve
#from sklearn.cross_validation import train_test_split 
#from sklearn.grid_search import GridSearchCV
#from sklearn.cross_validation import ShuffleSplit
from sklearn.metrics import classification_report,confusion_matrix, roc_curve, roc_auc_score, auc, accuracy_score
from sklearn.model_selection import ShuffleSplit,train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, label_binarize, StandardScaler, MinMaxScaler

import seaborn 
seaborn.set_context('notebook') 
seaborn.set_style(style='darkgrid')

from pprint import pprint 
 


### Evaluation Function


In [None]:
# Function for evaluation reports
def get_eval1(clf, X,y):
    # Cross Validation to test and anticipate overfitting problem
    scores1 = cross_val_score(clf, X, y, cv=2, scoring='accuracy')
    scores2 = cross_val_score(clf, X, y, cv=2, scoring='precision')
    scores3 = cross_val_score(clf, X, y, cv=2, scoring='recall')
    scores4 = cross_val_score(clf, X, y, cv=2, scoring='roc_auc')
    
    # The mean score and standard deviation of the score estimate
    print("Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (scores1.mean(), scores1.std()))
    print("Cross Validation Precision: %0.2f (+/- %0.2f)" % (scores2.mean(), scores2.std()))
    print("Cross Validation Recall: %0.2f (+/- %0.2f)" % (scores3.mean(), scores3.std()))
    print("Cross Validation roc_auc: %0.2f (+/- %0.2f)" % (scores4.mean(), scores4.std()))
    
    return 

def get_eval2(clf, X_train, y_train,X_test, y_test):
    # Cross Validation to test and anticipate overfitting problem
    scores1 = cross_val_score(clf, X_test, y_test, cv=2, scoring='accuracy')
    scores2 = cross_val_score(clf, X_test, y_test, cv=2, scoring='precision')
    scores3 = cross_val_score(clf, X_test, y_test, cv=2, scoring='recall')
    scores4 = cross_val_score(clf, X_test, y_test, cv=2, scoring='roc_auc')
    
    # The mean score and standard deviation of the score estimate
    print("Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (scores1.mean(), scores1.std()))
    print("Cross Validation Precision: %0.2f (+/- %0.2f)" % (scores2.mean(), scores2.std()))
    print("Cross Validation Recall: %0.2f (+/- %0.2f)" % (scores3.mean(), scores3.std()))
    print("Cross Validation roc_auc: %0.2f (+/- %0.2f)" % (scores4.mean(), scores4.std()))
    
    return  
  
# Function to get roc curve
def get_roc (y_test,y_pred):
    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    fpr, tpr, _ = roc_curve(y_test, y_pred)
    roc_auc = auc(fpr, tpr)
    #Plot of a ROC curve
    plt.figure()
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
             label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="upper left")
    plt.show()
    return


### XGBoost Model

In [None]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

import xgboost as xgb
from xgboost import XGBClassifier
#print('XGBoost v',xgb.__version__)

# fit, train and cross validate Decision Tree with training and test data 
def xgbclf(params, X_train, y_train,X_test, y_test):
  
    eval_set=[(X_train, y_train), (X_test, y_test)]
    
    model = XGBClassifier(**params).\
      fit(X_train, y_train, eval_set=eval_set, \
                  eval_metric='auc', early_stopping_rounds = 100, verbose=100)
        
    #print(model.best_ntree_limit)

    model.set_params(**{'n_estimators': model.best_ntree_limit})
    model.fit(X_train, y_train)
    #print(model,'\n')
    
    # Predict target variables y for test data
    y_pred = model.predict(X_test, ntree_limit=model.best_ntree_limit) #model.best_iteration
    #print(y_pred)
   
    # Get Cross Validation and Confusion matrix
    #get_eval(model, X_train, y_train)
    #get_eval2(model, X_train, y_train,X_test, y_test)
    
    # Create and print confusion matrix    
    abclf_cm = confusion_matrix(y_test,y_pred)
    print(abclf_cm)
    
    #y_pred = model.predict(X_test)
    print (classification_report(y_test,y_pred) )
    print ('\n')
    print ("Model Final Generalization Accuracy: %.6f" %accuracy_score(y_test,y_pred) )
    
    # Predict probabilities target variables y for test data
    y_pred_proba = model.predict_proba(X_test, ntree_limit=model.best_ntree_limit)[:,1] #model.best_iteration
    get_roc (y_test,y_pred_proba)
    return model

def plot_featureImportance(model, keys):
  importances = model.feature_importances_

  importance_frame = pd.DataFrame({'Importance': list(importances), 'Feature': list(keys)})
  importance_frame.sort_values(by = 'Importance', inplace = True)
  importance_frame.tail(10).plot(kind = 'barh', x = 'Feature', figsize = (8,8), color = 'orange')

### Preprocess
- Importing Dataset
- StandardScaler
- Encoding Categorical Feature
- Concate Transformed Dataset
- Split Training Dataset
- XGBoost  1a: Unbalance Dataset (Base Model: ROC_AUC:0.74)
- XGBoost  1b: Unbalance Dataset (ROC_AUC:0.79)

### Import Dataset

OK let's get started. We'll download the data from the UCI website.

In [None]:
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"

names = ['existingchecking', 'duration', 'credithistory', 'purpose', 'creditamount', 
         'savings', 'employmentsince', 'installmentrate', 'statussex', 'otherdebtors', 
         'residencesince', 'property', 'age', 'otherinstallmentplans', 'housing', 
         'existingcredits', 'job', 'peopleliable', 'telephone', 'foreignworker', 'classification']

data = pd.read_csv(url, names = names, delimiter=' ')
print(data.shape)
print (data.columns)
data.head(10)

In [None]:
# Binarize the y output for easier use of e.g. ROC curves -> 0 = 'bad' credit; 1 = 'good' credit
data.classification.replace([1,2], [1,0], inplace=True)
# Print number of 'good' credits (should be 700) and 'bad credits (should be 300)
data.classification.value_counts()

### StandardScaler

In [None]:
#numerical variables labels
numvars = ['creditamount', 'duration', 'installmentrate', 'residencesince', 'age', 
           'existingcredits', 'peopleliable', 'classification']

# Standardization
numdata_std = pd.DataFrame(StandardScaler().fit_transform(data[numvars].drop(['classification'], axis=1)))

### Encoding Categorical Feature

Labelencoding to transform categorical to numerical, Enables better Visualization than one hot encoding

In [None]:
from collections import defaultdict

#categorical variables labels
catvars = ['existingchecking', 'credithistory', 'purpose', 'savings', 'employmentsince',
           'statussex', 'otherdebtors', 'property', 'otherinstallmentplans', 'housing', 'job', 
           'telephone', 'foreignworker']

d = defaultdict(LabelEncoder)

# Encoding the variable
lecatdata = data[catvars].apply(lambda x: d[x.name].fit_transform(x))

# print transformations
for x in range(len(catvars)):
    print(catvars[x],": ", data[catvars[x]].unique())
    print(catvars[x],": ", lecatdata[catvars[x]].unique())

#One hot encoding, create dummy variables for every category of every categorical variable
dummyvars = pd.get_dummies(data[catvars])

### Concate Transformed Dataset
append the dummy variable of the initial numerical variables numvars# append 

In [8]:
data_clean = pd.concat([data[numvars], dummyvars], axis = 1)

print(data_clean.shape)

NameError: name 'data' is not defined

### Split Training Dataset

In [9]:
# Unscaled, unnormalized data
X_clean = data_clean.drop('classification', axis=1)
y_clean = data_clean['classification']
# X,y = datasets.make_circles(n_samples= 10**4, noise = 0.4, factor =10**-1)

X_train, X_conf_train, y_train, y_conf_train  = train_test_split(X_clean, y_clean, 
                                                                 test_size=0.6, random_state=10)

X_conf_train, X_cal, y_conf_train, y_cal = train_test_split(X_conf_train, y_conf_train,
                                                            test_size=0.5, random_state=0)

X_cal, X_test, y_cal,  y_test, = train_test_split(X_cal, y_cal, 
                                                  test_size=0.5, random_state=0)

NameError: name 'data_clean' is not defined

In [10]:
print('X_train, y_train:', np.shape(X_train), np.shape(y_train))
print('X_test, y_test:', np.shape(X_test), np.shape(y_test))

print('X_conf_train, y_conf_train:', np.shape(X_conf_train), np.shape(y_conf_train))
print('X_cal, y_cal:', np.shape(X_cal), np.shape(y_cal))

NameError: name 'X_train' is not defined

### XGBoost  Training (ROC_AUC:0.79)

In [None]:
params2={
    'n_estimators':3000,
    'objective': 'binary:logistic',
    'learning_rate': 0.005,
    #'gamma':0.01,
    'subsample':0.555,
    'colsample_bytree':0.7,
    'min_child_weight':3,
    'max_depth':8,
    #'seed':1024,
    'n_jobs' : -1
}

# xgbclf(params2, X_pp_train, y_pp_train, X_test, y_test)
# xgbclf(params2, X_train_clean, y_train_clean, X_test_clean, y_test_clean)
model = xgbclf(params2, X_train, y_train, X_test, y_test)

In [None]:
model.fit(X_train, y_train,)

### XGBoost Predictions

In [None]:
model.score(X_test, y_test)

In [None]:
print(model.score(X_conf_train, y_conf_train))
print(model.score(X_cal, y_cal))

# MACest Confidence Model For The German Credit XGBoost model

Training MACEst model

In [None]:
from macest.classification import models as clmod
from macest.classification import plots as clplot

param_bounds = clmod.SearchBounds(alpha_bounds = (0, 500), k_bounds = (5,15))
neighbour_search_params = clmod.HnswGraphArgs(query_args = dict(ef = 1100))

macest_model = clmod.ModelWithConfidence(model,
                                       X_conf_train,
                                       y_conf_train)

macest_model.fit(X_cal, np.array(y_cal))

 We can change the MACEst defaults if we want as shown below by editing the NamedTuple for each parameter

In [None]:
clmod.SearchBounds()

In [None]:
clmod.HnswGraphArgs()

Optimiser args are the arguments passed to scipy differential evolution and must be passed as a dictionary

In [None]:
optimiser_args = dict(popsize = 15)

In [None]:
# param_bounds = clmod.SearchBounds(k_bounds = (3,50))
# neighbour_search_params = clmod.HnswGraphArgs(query_args = dict(ef = 1100), 
#                                               init_args = dict(method = 'hnsw', space = 'cosinesimil'))

In [None]:
param_bounds = clmod.SearchBounds(k_bounds = (5,50))
neighbour_search_params = clmod.HnswGraphArgs(query_args = dict(ef = 1000))
optimiser_args = dict(popsize = 25, disp = False)

In [None]:
macest_model = clmod.ModelWithConfidence(model,
                                      X_conf_train,
                                      y_conf_train, 
                                      search_method_args =neighbour_search_params)

macest_model.fit(X_cal,
               np.array(y_cal),
               param_range = param_bounds,
               optimiser_args = optimiser_args)

In [None]:
macest_model.macest_model_params

In [None]:
macest_model.__dir__()

In [None]:
xgboost_preds = model.predict(X_test)
macest_conf_preds = macest_model.predict_proba(X_test)
xgboost_conf_preds = model.predict_proba(X_test)

In [None]:
xgboost_preds

In [None]:
xgboost_conf_preds

In [None]:
macest_conf_preds

In [None]:
macest_point_prediction_conf = macest_model.predict_confidence_of_point_prediction(X_test) 
                              
xgboost_point_prediction_conf = np.amax(xgboost_conf_preds, axis=1)

In [None]:
macest_point_prediction_conf

In [None]:
xgboost_point_prediction_conf

In [None]:
clplot.plot_calibration_curve([xgboost_point_prediction_conf,
                               macest_point_prediction_conf], 
                              ['XGBoost', 'MACE'],
                              xgboost_preds,
                              y_test)

### Let's compare calibration and forecast metrics

In [None]:
clplot.plot_calibration_metrics([xgboost_point_prediction_conf, 
                                macest_point_prediction_conf], 
                              ['XGBoost','MACE'], xgboost_preds, y_test)

In [None]:
clplot.plot_forecast_metrics([xgboost_point_prediction_conf, 
                                macest_point_prediction_conf], 
                              ['XGBoost','MACE'], xgboost_preds, y_test)

### FPR_FNR

### We will try to add confidence to a classic ML challenge, classifying images of letters based upon some statistical attributes (https://archive.ics.uci.edu/ml/datasets/letter+recognition)

Before running this stage, please download the `letters.data` file from the above URL and place this within the `data` folder in the root directory of this project.

In [None]:
letters_df = pd.read_csv("../../data/letter-recognition.data", header=None, names=cols).sample(frac=1)
letters_df.reset_index(drop=True, inplace=True)

In [None]:
y = letters_df['lettr']
X = letters_df.drop('lettr', axis =1)

### In the original feature space we have lots of correlated variables, and the feature importance is unlikely to be even amongst all of them

In [None]:
plt.figure(figsize = (12,10))
sns.heatmap(X.corr(),
            cmap = 'coolwarm',
            annot = True,
            vmin = -1.1, vmax =1.1)

In [None]:
X = X/16

In [None]:
enc = LabelEncoder()
y_clean = enc.fit_transform(y_clean)

In [None]:
X_pp_train, X_conf_train, y_pp_train, y_conf_train  = train_test_split(X_clean, y_clean, test_size=0.66, random_state=0)

X_conf_train, X_cal, y_conf_train, y_cal = train_test_split(X_conf_train, y_conf_train,
                                                            test_size=0.4, random_state=0)
X_cal, X_test, y_cal,  y_test, = train_test_split(X_cal, y_cal, test_size=0.4, random_state=0)

In [None]:
print(X_pp_train.shape[0])
print(X_conf_train.shape[0])
print(X_cal.shape[0])
print(X_test.shape[0])

In [None]:
model = RandomForestClassifier(random_state =0,
                               n_estimators =800,
                               n_jobs =-1)
model.fit(X_pp_train, y_pp_train)

In [None]:
print(model.score(X_pp_train, y_pp_train))
print(model.score(X_conf_train, y_conf_train))
print(model.score(X_cal, y_cal))
print(model.score(X_test, y_test))

### We will use the L2 metric, this is implictly saying that our measure of similarity between data points is the euclidean distance in feature space

In [None]:
param_bounds = clmod.SearchBounds(k_bounds = (3,50))
neighbour_search_params = clmod.HnswGraphArgs(query_args = dict(ef = 1000))
optimiser_args = dict(popsize = 25)

In [None]:
macest_model = clmod.ModelWithConfidence(model,
                                      X_conf_train,
                                      y_conf_train, 
                                      search_method_args = neighbour_search_params)

macest_model.fit(X_cal, y_cal, param_range = param_bounds, optimiser_args= optimiser_args)

## Unseen data

In [None]:
preds = model.predict(X_test)
conf_preds = macest_model.predict_proba(X_test)
rf_conf = model.predict_proba(X_test)
rf_point_prediction_conf = np.amax(rf_conf, axis=1)
macest_point_prediction_conf = macest_model.predict_confidence_of_point_prediction(X_test)

In [None]:
clplot.plot_calibration_curve([rf_point_prediction_conf,
                              macest_point_prediction_conf], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

In [None]:
clplot.plot_quantile_spaced_calibration_curve([rf_point_prediction_conf,
                             macest_point_prediction_conf,
                                ], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

In [None]:
clplot.plot_calibration_metrics([rf_point_prediction_conf,
                             macest_point_prediction_conf,
                                ], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

In [None]:
clplot.plot_forecast_metrics([rf_point_prediction_conf,
                             macest_point_prediction_conf], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

We see that MACEst performs better than the raw estimates from the random forest however it's still not great, let's try inducing a better co-ordinate system 

### PCA

In [None]:
X_pp_train, X_conf_train, y_pp_train, y_conf_train  = train_test_split(X, y, test_size=0.66, random_state=0)

pca = PCA(n_components=0.95, whiten = True)
pca.fit(X_pp_train)

X_pp_train = pca.transform(X_pp_train)
X_conf_train = pca.transform(X_conf_train)

X_conf_train, X_cal, y_conf_train, y_cal = train_test_split(X_conf_train, y_conf_train,
                                                            test_size=0.4, random_state=0)
X_cal, X_test, y_cal,  y_test, = train_test_split(X_cal, y_cal, test_size=0.4, random_state=0)

In [None]:
model = RandomForestClassifier(random_state =0,
                               n_estimators =800,
                               n_jobs =-1)
model.fit(X_pp_train, y_pp_train)

In [None]:
X_cal.shape

In [None]:
print(model.score(X_pp_train, y_pp_train))
print(model.score(X_conf_train, y_conf_train))
print(model.score(X_cal, y_cal))
print(model.score(X_test, y_test))

### We are now saying points are similar if the euclidean distance between their projection is whitned pca space (similar to mahalanobis) is small

In [None]:
neighbour_search_params = clmod.HnswGraphArgs(init_args = dict(method = 'hnsw',
                                                               space = 'l2'))

In [None]:
macest_model = clmod.ModelWithConfidence(model,
                                      X_conf_train,
                                      y_conf_train, 
                                      search_method_args = neighbour_search_params)

macest_model.fit(X_cal, y_cal, param_range = param_bounds, optimiser_args= optimiser_args)

In [None]:
preds = model.predict(X_test)
conf_preds = macest_model.predict_proba(X_test)
rf_conf = model.predict_proba(X_test)
rf_point_prediction_conf = np.amax(rf_conf, axis=1)
macest_point_prediction_conf = macest_model.predict_confidence_of_point_prediction(X_test)

In [None]:
clplot.plot_calibration_curve([rf_point_prediction_conf,macest_point_prediction_conf], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

In [None]:
clplot.plot_quantile_spaced_calibration_curve([rf_point_prediction_conf, macest_point_prediction_conf], 
                                              ['Uncalibrated RF','MACEst' ],
                                              preds, y_test)

In [None]:
clplot.plot_calibration_metrics([rf_point_prediction_conf, macest_point_prediction_conf], 
                                 ['Uncalibrated RF','MACEst' ],
                                 preds, y_test)

In [None]:
clplot.plot_forecast_metrics([rf_point_prediction_conf,
                             macest_point_prediction_conf,], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

## That is better but still not great, can we do better ?

MACEst works by finding a set of nearest neighbours and then uses the distance to these k neighbours as a proxy for the epistemic uncertainty, because of this paradigm the natural metric to one which induces a good nearest neighbour distance. This method exists and is known as neighbourhood component analysis (https://www.cs.toronto.edu/~hinton/absps/nca.pdf)
#### Let's compare the results if we use this metric

In [None]:
X_pp_train, X_conf_train, y_pp_train, y_conf_train  = train_test_split(X, y, test_size=0.66, random_state=0)

nca = NeighborhoodComponentsAnalysis(n_components = X_pp_train.shape[1] ,
                                     max_iter = 30,
                                     verbose =1)
nca.fit(X_pp_train, y_pp_train)

X_pp_train = nca.transform(X_pp_train)
X_conf_train = nca.transform(X_conf_train)

X_conf_train, X_cal, y_conf_train, y_cal = train_test_split(X_conf_train, y_conf_train,
                                                            test_size=0.4, random_state=0)
X_cal, X_test, y_cal,  y_test, = train_test_split(X_cal, y_cal, test_size=0.3, random_state=0)

In [None]:
model = RandomForestClassifier(random_state =0,
                               n_estimators =800,
                               n_jobs =-1)
model.fit(X_pp_train, y_pp_train)

In [None]:
macest_model = clmod.ModelWithConfidence(model,
                                      X_conf_train,
                                      y_conf_train, 
                                      search_method_args = neighbour_search_params)

macest_model.fit(X_cal, y_cal, param_range = param_bounds, optimiser_args= optimiser_args)

In [None]:
print(model.score(X_pp_train, y_pp_train))
print(model.score(X_conf_train, y_conf_train))
print(model.score(X_cal, y_cal))
print(model.score(X_test, y_test))

In [None]:
preds = model.predict(X_test)
conf_preds = macest_model.predict_proba(X_test)
rf_conf = model.predict_proba(X_test)
rf_point_prediction_conf = np.amax(rf_conf, axis=1)
macest_point_prediction_conf = macest_model.predict_confidence_of_point_prediction(X_test)

In [None]:
clplot.plot_calibration_curve([rf_point_prediction_conf,
                              macest_point_prediction_conf,], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

In [None]:
clplot.plot_quantile_spaced_calibration_curve([rf_point_prediction_conf, macest_point_prediction_conf], 
                                              ['Uncalibrated RF', 'MACEst'],
                                              preds, y_test)

In [None]:
clplot.plot_calibration_metrics([rf_point_prediction_conf,
                                 macest_point_prediction_conf], 
                                 ['Uncalibrated RF','MACEst' ],
                                 preds, y_test)

In [None]:
clplot.plot_forecast_metrics([rf_point_prediction_conf,
                              macest_point_prediction_conf,], 
                              ['Uncalibrated RF','MACEst' ],
                              preds, y_test)

In this metric we see that MACEst works very well