# pIC50 Prediction Task

During the course of a drug discovery program, a critical task is the ability to “screen” a
library of compounds in order to find molecules that can bind to and potentially inhibit
the activity of a target protein (we call such readout “potency”). Due to the prohibitive
cost of large scale experimental screening, virtual in silico screening serves as an initial
step. This approach significantly reduces costs while facilitating the evaluation and
prioritization of an extensive range of small molecules.

A variety of methods is available for virtual screening, including ligand-based machine
learning models that rely on the molecular structure as input to predict their activities.
This notebook includes an exploration of a dataset of 4.6k compounds that have undergone 
experimental testing against the Epidermal Growth Factor Receptor (EGFR) kinase, a target
associated with various cancers, as well as a prediction of the potency value (pIC50), using 
an xgboost model classifier, for novel compounds targeting EGFR.


## XGBoost Model

In [None]:
#import train and test datasets
train = pd.read_csv("pIC50_prediction/data/train.csv")
test = pd.read_csv("pIC50_prediction/data/test.csv")

In [22]:
train.head()

Unnamed: 0,molecule_chembl_id,units,smiles,molecular_weight,n_hba,n_hbd,logp,ro5_fulfilled,label
3135,CHEMBL485070,nM,CCOc1cc(/C=C(\C#N)C(N)=O)cc(CSc2ccccc2)c1O,354.103813,5,2,3.47548,True,0
4597,CHEMBL4202780,nM,COc1cc(N2CCN(C)CC2)ccc1Nc1ncc(Cl)c(Oc2ccccc2NC...,551.241166,9,2,4.4826,True,0
2137,CHEMBL3676348,nM,C=CC(=O)Nc1cc2c(Nc3ccc(Cl)nc3)ncnc2cc1OCCNC(C)=O,426.120716,7,3,3.0612,True,0
3760,CHEMBL425402,nM,Fc1ccc(Nc2ncnc3c2NCc2ccccc2O3)cc1Cl,342.068367,5,2,4.7305,True,0
3495,CHEMBL116547,nM,COc1cc(/C=C(\C#N)C(=O)c2ccc(O)c(O)c2)cc(CSCc2c...,447.114044,7,3,5.03528,True,0


In [23]:
# Keep only the columns we want
train = train[["molecule_chembl_id", "smiles","molecular_weight", "n_hba","n_hbd","logp","label"]]
train.head()

Unnamed: 0,molecule_chembl_id,smiles,molecular_weight,n_hba,n_hbd,logp,label
3135,CHEMBL485070,CCOc1cc(/C=C(\C#N)C(N)=O)cc(CSc2ccccc2)c1O,354.103813,5,2,3.47548,0
4597,CHEMBL4202780,COc1cc(N2CCN(C)CC2)ccc1Nc1ncc(Cl)c(Oc2ccccc2NC...,551.241166,9,2,4.4826,0
2137,CHEMBL3676348,C=CC(=O)Nc1cc2c(Nc3ccc(Cl)nc3)ncnc2cc1OCCNC(C)=O,426.120716,7,3,3.0612,0
3760,CHEMBL425402,Fc1ccc(Nc2ncnc3c2NCc2ccccc2O3)cc1Cl,342.068367,5,2,4.7305,0
3495,CHEMBL116547,COc1cc(/C=C(\C#N)C(=O)c2ccc(O)c(O)c2)cc(CSCc2c...,447.114044,7,3,5.03528,0


In [24]:
def smiles_to_fp(smiles, method="maccs", n_bits=2048):
    """
    Encode a molecule from a SMILES string into a fingerprint.

    Parameters
    ----------
    smiles : str
        The SMILES string defining the molecule.

    method : str
        The type of fingerprint to use. Default is MACCS keys.

    n_bits : int
        The length of the fingerprint.

    Returns
    -------
    array
        The fingerprint array.

    """

    # convert smiles to RDKit mol object
    mol = Chem.MolFromSmiles(smiles)

    if method == "maccs":
        return np.array(MACCSkeys.GenMACCSKeys(mol))
    if method == "morgan2":
        fpg = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=n_bits)
        return np.array(fpg.GetFingerprint(mol))
    if method == "morgan3":
        fpg = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=n_bits)
        return np.array(fpg.GetFingerprint(mol))
    else:
        # NBVAL_CHECK_OUTPUT
        print(f"Warning: Wrong method specified: {method}. Default will be used instead.")
        return np.array(MACCSkeys.GenMACCSKeys(mol))

In [25]:
compound_df = train.copy()

In [26]:
# Add column for fingerprint
compound_df["fp"] = compound_df["smiles"].apply(smiles_to_fp)
compound_df.head(3)

Unnamed: 0,molecule_chembl_id,smiles,molecular_weight,n_hba,n_hbd,logp,label,fp
3135,CHEMBL485070,CCOc1cc(/C=C(\C#N)C(N)=O)cc(CSc2ccccc2)c1O,354.103813,5,2,3.47548,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4597,CHEMBL4202780,COc1cc(N2CCN(C)CC2)ccc1Nc1ncc(Cl)c(Oc2ccccc2NC...,551.241166,9,2,4.4826,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2137,CHEMBL3676348,C=CC(=O)Nc1cc2c(Nc3ccc(Cl)nc3)ncnc2cc1OCCNC(C)=O,426.120716,7,3,3.0612,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [27]:
def plot_roc_curves_for_models(models, test_x, test_y, save_png=False):
    """
    Helper function to plot customized roc curve.

    Parameters
    ----------
    models: dict
        Dictionary of pretrained machine learning models.
    test_x: list
        Molecular fingerprints for test set.
    test_y: list
        Associated activity labels for test set.
    save_png: bool
        Save image to disk (default = False)

    Returns
    -------
    fig:
        Figure.
    """

    fig, ax = plt.subplots()

    # Below for loop iterates through your models list
    for model in models:
        # Select the model
        ml_model = model["model"]
        # Prediction probability on test set
        test_prob = ml_model.predict_proba(test_x)[:, 1]
        # Prediction class on test set
        test_pred = ml_model.predict(test_x)
        # Compute False postive rate and True positive rate
        fpr, tpr, thresholds = metrics.roc_curve(test_y, test_prob)
        # Calculate Area under the curve to display on the plot
        auc = roc_auc_score(test_y, test_prob)
        # Plot the computed values
        ax.plot(fpr, tpr, label=(f"{model['label']} AUC area = {auc:.2f}"))

    # Custom settings for the plot
    ax.plot([0, 1], [0, 1], "r--")
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    ax.set_title("Receiver Operating Characteristic")
    ax.legend(loc="lower right")
    # Save plot
    if save_png:
        fig.savefig(f"pIC50_prediction/results/roc_auc", dpi=300, bbox_inches="tight", transparent=True)
    return fig

In [28]:
def model_performance(ml_model, test_x, test_y, verbose=True):
    """
    Helper function to calculate model performance

    Parameters
    ----------
    ml_model: sklearn model object
        The machine learning model to train.
    test_x: list
        Molecular fingerprints for test set.
    test_y: list
        Associated activity labels for test set.
    verbose: bool
        Print performance measure (default = True)

    Returns
    -------
    tuple:
        Accuracy, sensitivity, specificity, auc on test set.
    """

    # Prediction probability on test set
    test_prob = ml_model.predict_proba(test_x)[:, 1]

    # Prediction class on test set
    test_pred = ml_model.predict(test_x)

    # Performance of model on test set
    accuracy = accuracy_score(test_y, test_pred)
    sens = recall_score(test_y, test_pred)
    spec = recall_score(test_y, test_pred, pos_label=0)
    auc = roc_auc_score(test_y, test_prob)

    if verbose:
        # Print performance results
        # NBVAL_CHECK_OUTPUT        print(f"Accuracy: {accuracy:.2}")
        print(f"Sensitivity: {sens:.2f}")
        print(f"Specificity: {spec:.2f}")
        print(f"AUC: {auc:.2f}")

    return accuracy, sens, spec, auc

In [29]:
def model_training_and_validation(ml_model, name, splits, verbose=True):
    """
    Fit a machine learning model on a random train-test split of the data
    and return the performance measures.

    Parameters
    ----------
    ml_model: sklearn model object
        The machine learning model to train.
    name: str
        Name of machine learning algorithm: RF, SVM, ANN
    splits: list
        List of desciptor and label data: train_x, test_x, train_y, test_y.
    verbose: bool
        Print performance info (default = True)

    Returns
    -------
    tuple:
        Accuracy, sensitivity, specificity, auc on test set.

    """
    train_x, test_x, train_y, test_y = splits

    # Fit the model
    ml_model.fit(train_x, train_y)

    # Calculate model performance results
    accuracy, sens, spec, auc = model_performance(ml_model, test_x, test_y, verbose)

    return accuracy, sens, spec, auc

In [43]:
fingerprint_to_model = compound_df.fp.tolist()
label_to_model = compound_df.label.tolist()

# Split data randomly in train and test set
# note that we use test/train_x for the respective fingerprint splits
# and test/train_y for the respective label splits
(
    static_train_x,
    static_test_x,
    static_train_y,
    static_test_y,
) = train_test_split(fingerprint_to_model, label_to_model, test_size=0.2, random_state=SEED)
splits = [static_train_x, static_test_x, static_train_y, static_test_y]
# NBVAL_CHECK_OUTPUT
print("Training data size:", len(static_train_x))
print("Test data size:", len(static_test_x))

Training data size: 2966
Test data size: 742


In [31]:
compound_df.head(3)

Unnamed: 0,molecule_chembl_id,smiles,molecular_weight,n_hba,n_hbd,logp,label,fp
3135,CHEMBL485070,CCOc1cc(/C=C(\C#N)C(N)=O)cc(CSc2ccccc2)c1O,354.103813,5,2,3.47548,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4597,CHEMBL4202780,COc1cc(N2CCN(C)CC2)ccc1Nc1ncc(Cl)c(Oc2ccccc2NC...,551.241166,9,2,4.4826,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2137,CHEMBL3676348,C=CC(=O)Nc1cc2c(Nc3ccc(Cl)nc3)ncnc2cc1OCCNC(C)=O,426.120716,7,3,3.0612,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [32]:
X = compound_df[["molecular_weight","n_hba","n_hbd","logp","fp"]]
Y = compound_df['label']

In [33]:
X.dtypes

molecular_weight    float64
n_hba                 int64
n_hbd                 int64
logp                float64
fp                   object
dtype: object

In [34]:
Y.dtype

dtype('int64')

In [63]:
%%time
def crossvalidation(train):
#dictionary for collecting results
results_dict = {}

#obtaining default parameters by calling .fit() to XGBoost model instance
xgbc0 = xgb.XGBClassifier(objective='binary:logistic',
                          booster='gbtree',
                          eval_metric='auc',
                          tree_method='hist',
                          grow_policy='lossguide',
                          use_label_encoder=True)
                          
xgbc0.fit(static_train_x, static_train_y)

#extracting default parameters from benchmark model
default_params = {}
gparams = xgbc0.get_params()

#default parameters have to be wrapped in lists - even single values - so GridSearchCV can take them as inputs
for key in gparams.keys():
    gp = gparams[key]
    default_params[key] = [gp]

#benchmark model. Grid search is not performed, since only single values are provided as parameter grid.
#However, cross-validation is still executed
clf0 = GridSearchCV(estimator=xgbc0, scoring='accuracy', param_grid=default_params, return_train_score=True, verbose=1, cv=10)
clf0.fit(static_train_x, static_train_y)

#results dataframe
df = pd.DataFrame(clf0.cv_results_)

#predictions - inputs to confusion matrix
train_predictions = clf0.predict(static_train_x)
test_predictions = clf0.predict(static_test_x)

#confusion matrices
cfm_train = confusion_matrix(static_train_y, train_predictions)
cfm_test = confusion_matrix(static_test_y, test_predictions)

#accuracy scores
accs_train = accuracy_score(static_train_y, train_predictions)
accs_test = accuracy_score(static_test_y, test_predictions)

#sensitivity scores
sens_train = precision_score(static_train_y, train_predictions)
sens_test = precision_score(static_test_y, test_predictions)

#specificity scores
spec_train = recall_score(static_train_y, train_predictions)
spec_test = recall_score(static_test_y, test_predictions)

#F1 scores for each train/test label
f1s_train_p1 = f1_score(static_train_y, train_predictions, pos_label=1)
f1s_train_p0 = f1_score(static_train_y, train_predictions, pos_label=0)
f1s_test_p1 = f1_score(static_test_y, test_predictions, pos_label=1)
f1s_test_p0 = f1_score(static_test_y, test_predictions, pos_label=0)

#Area Under the Receiver Operating Characteristic Curve
test_ras = roc_auc_score(static_test_y, clf0.predict_proba(static_test_x)[:,1])

#best parameters
bp = clf0.best_params_

#storing computed values in results dictionary
results_dict['xgbc0'] = {'iterable_parameter': np.nan,
                         'classifier': deepcopy(clf0),
                         'cv_results': df.copy(),
                         'cfm_train': cfm_train,
                         'cfm_test': cfm_test,
                         'train_accuracy': accs_train,
                         'test_accuracy': accs_test,
                         'train_sensitivity': sens_train,
                         'test_sensitivity': sens_test,
                         'train_specificity': spec_train,
                         'test_specificity': spec_test,
                         'train F1-score label 1': f1s_train_p1,
                         'train F1-score label 0': f1s_train_p0,
                         'test F1-score label 1': f1s_test_p1,
                         'test F1-score label 0': f1s_test_p0,
                         'test roc auc score': test_ras,
                         'best_params': bp}

return results_dict, bp, 

Fitting 10 folds for each of 1 candidates, totalling 10 fits
CPU times: user 9.87 s, sys: 99.2 ms, total: 9.97 s
Wall time: 2.87 s


In [64]:
#creating deepcopy of default parameters before manipulations
params = deepcopy(default_params)

#setting grid of selected parameters for iteration
param_grid = {'gamma': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4, 200],
              'learning_rate': [0.01, 0.03, 0.06, 0.1, 0.15, 0.2, 0.25, 0.300000012, 0.4, 0.5, 0.6, 0.7],
              'max_depth': [5,6,7,8,9,10,11,12,13,14],
              'n_estimators': [50,65,80,100,115,130,150],
              'reg_alpha': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
              'reg_lambda': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200]}

In [65]:

#start time
t0 = time.time()
#No. of jobs
gcvj = np.cumsum([len(x) for x in param_grid.values()])[-1]

#iteration loop. Each selected parameter iterated separately
for i,grid_key in enumerate(param_grid.keys()):
    
    #variable for measuring iteration time
    loop_start = time.time()
       
    #creating param_grid argument for GridSearchCV:
    #listing grid values of current iterable parameter and wrapping non-iterable parameter single values in list
    for param_key in params.keys():
        if param_key == grid_key:
            params[param_key] = param_grid[grid_key]
        else:
            #use best parameters of last iteration
            try:
                param_value = [clf.best_params_[param_key]]
                params[param_key] = param_value
            #use benchmark model parameters for first iteration
            except:
                param_value = [clf0.best_params_[param_key]]
                params[param_key] = param_value
    
    #classifier instance of current iteration
    xgbc = xgb.XGBClassifier(**default_params)
    
    #GridSearch instance of current iteration
    clf = GridSearchCV(estimator=xgbc, param_grid=params, scoring='accuracy', return_train_score=True, verbose=1, cv=10)
    clf.fit(static_train_x, static_train_y)

    #results dataframe
    df = pd.DataFrame(clf.cv_results_)

    #predictions - inputs to confusion matrix
    train_predictions = clf0.predict(static_train_x)
    test_predictions = clf0.predict(static_test_x)

    #confusion matrices
    cfm_train = confusion_matrix(static_train_y, train_predictions)
    cfm_test = confusion_matrix(static_test_y, test_predictions)

    #accuracy scores
    accs_train = accuracy_score(static_train_y, train_predictions)
    accs_test = accuracy_score(static_test_y, test_predictions)

    #sensitivity scores
    sens_train = precision_score(static_train_y, train_predictions)
    sens_test = precision_score(static_test_y, test_predictions)

    #specificity scores
    spec_train = recall_score(static_train_y, train_predictions)
    spec_test = recall_score(static_test_y, test_predictions)

    #F1 scores for each train/test label
    f1s_train_p1 = f1_score(static_train_y, train_predictions, pos_label=1)
    f1s_train_p0 = f1_score(static_train_y, train_predictions, pos_label=0)
    f1s_test_p1 = f1_score(static_test_y, test_predictions, pos_label=1)
    f1s_test_p0 = f1_score(static_test_y, test_predictions, pos_label=0)

    #Area Under the Receiver Operating Characteristic Curve
    test_ras = roc_auc_score(static_test_y, clf.predict_proba(static_test_x)[:,1])
    
    #best parameters
    bp = clf.best_params_
    
    #storing computed values in results dictionary
    results_dict[f'xgbc{i+1}']  = {'iterable_parameter': grid_key,
                                    'classifier': deepcopy(clf),
                                    'cv_results': df.copy(),
                                    'cfm_train': cfm_train,
                                    'cfm_test': cfm_test,
                                    'train_accuracy': accs_train,
                                    'test_accuracy': accs_test,
                                    'train_sensitivity': sens_train,
                                    'test_sensitivity': sens_test,
                                    'train_specificity': spec_train,
                                    'test_specificity': spec_test,
                                    'train F1-score label 1': f1s_train_p1,
                                    'train F1-score label 0': f1s_train_p0,
                                    'test F1-score label 1': f1s_test_p1,
                                    'test F1-score label 0': f1s_test_p0,
                                    'test roc auc score': test_ras,
                                    'best_params': bp}

    
    #variable for measuring iteration time
    elapsed_time = time.time() - loop_start
    print(f'iteration #{i+1} finished in: {elapsed_time} seconds')

#stop time
t1 = time.time()

#elapsed time
gcvt = t1 - t0

Fitting 10 folds for each of 13 candidates, totalling 130 fits
iteration #1 finished in: 19.73732590675354 seconds
Fitting 10 folds for each of 12 candidates, totalling 120 fits
iteration #2 finished in: 59.204182624816895 seconds
Fitting 10 folds for each of 10 candidates, totalling 100 fits
iteration #3 finished in: 46.67806529998779 seconds
Fitting 10 folds for each of 7 candidates, totalling 70 fits
iteration #4 finished in: 29.284557104110718 seconds
Fitting 10 folds for each of 13 candidates, totalling 130 fits
iteration #5 finished in: 43.69199800491333 seconds
Fitting 10 folds for each of 13 candidates, totalling 130 fits
iteration #6 finished in: 59.22780919075012 seconds


In [61]:
results_dict

{'xgbc0': {'iterable_parameter': nan,
  'classifier': GridSearchCV(cv=5,
               estimator=XGBClassifier(base_score=None, booster='gbtree',
                                       callbacks=None, colsample_bylevel=None,
                                       colsample_bynode=None,
                                       colsample_bytree=None, device=None,
                                       early_stopping_rounds=None,
                                       enable_categorical=False,
                                       eval_metric='auc', feature_types=None,
                                       gamma=None, grow_policy='lossguide',
                                       importance_type=None,
                                       interaction_constraints=None,
                                       learnin...
                           'max_cat_threshold': [None],
                           'max_cat_to_onehot': [None], 'max_delta_step': [None],
                           'max_d

In [None]:
N_FOLDS = 3

In [None]:
for model in models:
    print("\n======= ")
    print(f"{model['label']}")
    crossvalidation(model["model"], compound_df, n_folds=N_FOLDS)


Model_RF
Mean accuracy: 0.85 	and std : 0.01 
Mean sensitivity: 0.47 	and std : 0.01 
Mean specificity: 0.94 	and std : 0.00 
Mean AUC: 0.87 	and std : 0.00 
Time taken : 1.58s


Model_SVM
Mean accuracy: 0.84 	and std : 0.01 
Mean sensitivity: 0.31 	and std : 0.02 
Mean specificity: 0.97 	and std : 0.00 
Mean AUC: 0.84 	and std : 0.01 
Time taken : 14.57s


Model_ANN
Mean accuracy: 0.82 	and std : 0.01 
Mean sensitivity: 0.35 	and std : 0.04 
Mean specificity: 0.93 	and std : 0.01 
Mean AUC: 0.81 	and std : 0.00 
Time taken : 4.38s


Model_ANN
Mean accuracy: 0.82 	and std : 0.01 
Mean sensitivity: 0.35 	and std : 0.04 
Mean specificity: 0.93 	and std : 0.01 
Mean AUC: 0.81 	and std : 0.00 
Time taken : 4.41s



In [None]:
# Reset data frame
compound_df = chembl_df.copy()

In [None]:
# Use Morgan fingerprint with radius 3
compound_df["fp"] = compound_df["smiles"].apply(smiles_to_fp, args=("morgan3",))
compound_df.head(3)
# NBVAL_CHECK_OUTPUT

Unnamed: 0,molecule_chembl_id,smiles,pIC50,active,fp
0,CHEMBL63786,Brc1cccc(Nc2ncnc3cc4ccccc4cc23)c1,11.522879,1.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,CHEMBL35820,CCOc1cc2ncnc(Nc3cccc(Br)c3)c2cc1OCC,11.221849,1.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,CHEMBL53711,CN(C)c1cc2c(Nc3cccc(Br)c3)ncnc2cn1,11.221849,1.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [None]:
crossvalidation(model["model"], compound_df, n_folds=N_FOLDS)


Model_RF
Mean accuracy: 0.87 	and std : 0.00 
Mean sensitivity: 0.51 	and std : 0.02 
Mean specificity: 0.96 	and std : 0.00 
Mean AUC: 0.90 	and std : 0.00 
Time taken : 4.27s


Model_ANN
Mean accuracy: 0.85 	and std : 0.00 
Mean sensitivity: 0.57 	and std : 0.04 
Mean specificity: 0.91 	and std : 0.01 
Mean AUC: 0.86 	and std : 0.01 
Time taken : 18.14s


Model_ANN
Mean accuracy: 0.85 	and std : 0.00 
Mean sensitivity: 0.57 	and std : 0.04 
Mean specificity: 0.91 	and std : 0.01 
Mean AUC: 0.86 	and std : 0.01 
Time taken : 17.98s



In [None]:
def plot():

#number of rows depend on number of iterations
nrows = len(results_dict.keys())

#standard group names for confusion matrices
group_names = ['True Neg','False Pos','False Neg','True Pos']

#creating figure
f, axes = plt.subplots(nrows,2,figsize=(18,8*nrows));

#iteratively plotting train/test accuracy scores and test confusion matrix
for i,ax in enumerate(axes):
    
    #current key of results dictionary
    ckey = list(results_dict.keys())[i] 
    
    #plotting scores for models other than the benchark model
    if ckey != 'xgbc0':
        x1 = results_dict[ckey]['cv_results'].loc[:,'mean_train_score']
        x2 = results_dict[ckey]['cv_results'].loc[:,'mean_test_score']
        
        ax[0].plot(x1, label='train scores', color='blue');
        ax[0].plot(x2, label='test scores', color='red');
        ax[0].set_title(f'Iteration #{i+1} results');
               
        ax[0].set_xticks(list(range(0,len([x[results_dict[ckey]['iterable_parameter']] for x in results_dict[ckey]['cv_results']['params']]))));
        ax[0].set_xticklabels(sorted([x[results_dict[ckey]['iterable_parameter']] for x in results_dict[ckey]['cv_results']['params']]));
    
        ax[0].grid('major');
        ax[0].legend();
        ax[0].set_xlabel(results_dict[ckey]['iterable_parameter'])
        ax[0].set_ylabel('mean score');
    
    #leaving scores plot blank for benchmark model
    else:
        ax[0].axis('off')
        ax[0].text(x=0.5, y=0.5, s='No iteration has been performed', fontsize=16, va='center', ha='center')
    
    #computing variables for specific confusion matrix
    group_counts = ["{0:0.0f}".format(value) for value in results_dict[ckey]['cfm_test'].flatten()]
    group_percentages = ["{0:.2%}".format(value) for value in results_dict[ckey]['cfm_test'].flatten()/np.sum(results_dict[ckey]['cfm_test'])]
    labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in zip(group_names,group_counts,group_percentages)]
    labels = np.asarray(labels).reshape(2,2)
    
    #plotting confusion matrix
    sns.heatmap(results_dict[ckey]['cfm_test'], annot=labels, fmt='', cmap='Blues', ax=ax[1])
    
plt.show();