The Impact of Grad-CAM Parameters on Results

In [None]:
"""
Using gradient information to generate heat maps showing the regions of interest of the network, 
namely, the corresponding markers. 
Image annotations can be obtained from the extraction of weighting parameters. 
The {gradsize} of the attention score is extracted and converted into lesion annotation. 
"""

Model

In [1]:
import sklearn
from sklearn import feature_selection as fs
 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn import tree
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB, CategoricalNB, ComplementNB
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.neighbors import NearestNeighbors,KDTree,BallTree
from sklearn.ensemble import GradientBoostingClassifier

from catboost import CatBoostClassifier
import xgboost as xgb

In [3]:
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc, r2_score
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import precision_recall_curve, precision_score, average_precision_score, recall_score, f1_score

In [4]:
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import ClusterCentroids, EditedNearestNeighbours
from imblearn.under_sampling import RandomUnderSampler



gradsize = 0.5

In [5]:
######### TCGA discovery features #############
data_all = pd.read_csv('../Data/radiomic_feature/radiomics_0.5_discovery.csv',index_col=0)

label_caf = np.array(data_all['CAF'])
label_tnbc = np.array(data_all['tnbc'])

data_feature = data_all.drop(['CAF', 'tnbc'], axis=1)
#data_feature = data_feature.values
feature_name = data_feature.columns

print(data_feature.shape)
print(label_caf.shape)
print(label_tnbc.shape)

(461, 36)
(461,)
(461,)


In [6]:
# discovery cohort 
# radiogenomics discovery cohort 

X_train, X_test, y_train, y_test = train_test_split(
        data_feature, label_tnbc, test_size=0.2, stratify=label_tnbc, random_state=42
    )

print(Counter(y_train),Counter(y_test))
data_feature.shape, X_train.shape, X_test.shape


Counter({0: 319, 1: 49}) Counter({0: 81, 1: 12})


((461, 36), (368, 36), (93, 36))

In [7]:
# train model
############### Discovery #######################
discovery_params = {
    'n_estimators': 25, 
    'learning_rate': 1.51, 
    'max_depth': 2, 
    'min_child_weight': 1, 
    'gamma': 0, 
    'subsample': 0.5, 
    'colsample_bytree': 0.6, 
    'reg_alpha': 2.04, 
    'reg_lambda': 1.14, 
    'scale_pos_weight': 6.5, 
    'objective': 'binary:logistic', 
    'eval_metric': 'aucpr', 
    'random_state': 42,
    'max_delta_step': 0,
    
}

xgb_model = xgb.XGBClassifier(**discovery_params)
xgb_model.fit(X_train, y_train)

In [9]:
y_pred_train = xgb_model.predict(X_train)
acc_train = accuracy_score(y_pred=y_pred_train, y_true=y_train)

pred_prob = xgb_model.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_score=pred_prob[:, 1], y_true=y_test)
roc_auc = auc(fpr, tpr)
y_pred = (pred_prob[:, 1] >= 0.5).astype(int)
acc_con1 = confusion_matrix(y_pred=y_pred, y_true=y_test)
acc_pred1 = accuracy_score(y_pred=y_pred, y_true=y_test)

tn, fp, fn, tp = acc_con1.ravel()
# Specificity
spec_score = tn / (tn + fp) if (tn + fp) > 0 else 0 

precision, recall, thresholds = precision_recall_curve(y_pred, y_test)
pr_auc = auc(recall, precision)

preci_scores = precision_score(y_pred=y_pred, y_true=y_test)
recall_scores = recall_score(y_pred=y_pred, y_true=y_test)


#print("Accuracy Train: ", acc_train)
print("Accuracy Test: ", acc_pred1)
print("Confusion Matrix: ")
print(acc_con1) 
print("ROC AUC: ", roc_auc)
print("PR AUC: ", pr_auc)

print('Recall / Sensitivity: ', recall_scores)
print('Specificity: ', spec_score)


Accuracy Test:  0.8172043010752689
Confusion Matrix: 
[[65 16]
 [ 1 11]]
ROC AUC:  0.824074074074074
PR AUC:  0.748058542413381
Recall / Sensitivity:  0.9166666666666666
Specificity:  0.8024691358024691


gradsize = 0.6

In [10]:
######### TCGA discovery features #############
data_all = pd.read_csv('../Data/radiomic_feature/radiomics_0.6_discovery.csv',index_col=0)

label_caf = np.array(data_all['CAF'])
label_tnbc = np.array(data_all['tnbc'])

data_feature = data_all.drop(['CAF', 'tnbc'], axis=1)
#data_feature = data_feature.values
feature_name = data_feature.columns

print(data_feature.shape)
print(label_caf.shape)
print(label_tnbc.shape)

(461, 36)
(461,)
(461,)


In [11]:
# discovery cohort 
X_train, X_test, y_train, y_test = train_test_split(
        data_feature, label_tnbc, test_size=0.2, stratify=label_tnbc, random_state=42
    )

print(Counter(y_train),Counter(y_test))
data_feature.shape, X_train.shape, X_test.shape

Counter({0: 319, 1: 49}) Counter({0: 81, 1: 12})


((461, 36), (368, 36), (93, 36))

In [12]:
# train model
############### Discovery #######################
discovery_params = {
    'n_estimators': 58, 
    'learning_rate': 1.87, 
    'max_depth': 4, 
    'min_child_weight': 7, 
    'gamma': 0.8, 
    'subsample': 0.6, 
    'colsample_bytree': 1.0, 
    'reg_alpha': 0.08, 
    'reg_lambda': 1.78, 
    'max_delta_step': 5, 
    'scale_pos_weight': 7.7, 
    'objective': 'binary:logistic', 
    'eval_metric': 'aucpr', 
    'random_state': 42,
    
}

xgb_model = xgb.XGBClassifier(**discovery_params)
xgb_model.fit(X_train, y_train)

In [13]:
y_pred_train = xgb_model.predict(X_train)
acc_train = accuracy_score(y_pred=y_pred_train, y_true=y_train)

pred_prob = xgb_model.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_score=pred_prob[:, 1], y_true=y_test)
roc_auc = auc(fpr, tpr)
y_pred = (pred_prob[:, 1] >= 0.65).astype(int)
acc_con1 = confusion_matrix(y_pred=y_pred, y_true=y_test)
acc_pred1 = accuracy_score(y_pred=y_pred, y_true=y_test)

tn, fp, fn, tp = acc_con1.ravel()
# Specificity
spec_score = tn / (tn + fp) if (tn + fp) > 0 else 0 

precision, recall, thresholds = precision_recall_curve(y_pred, y_test)
pr_auc = auc(recall, precision)

preci_scores = precision_score(y_pred=y_pred, y_true=y_test)
recall_scores = recall_score(y_pred=y_pred, y_true=y_test)


#print("Accuracy Train: ", acc_train)
print("Accuracy Test: ", acc_pred1)
print("Confusion Matrix: ")
print(acc_con1) 
print("ROC AUC: ", roc_auc)
print("PR AUC: ", pr_auc)

print('Recall / Sensitivity: ', recall_scores)
print('Specificity: ', spec_score)


Accuracy Test:  0.7741935483870968
Confusion Matrix: 
[[60 21]
 [ 0 12]]
ROC AUC:  0.8508230452674898
PR AUC:  0.7947214076246335
Recall / Sensitivity:  1.0
Specificity:  0.7407407407407407


gradsize = 0.7

In [14]:
######### TCGA discovery features #############
data_all = pd.read_csv('../Data/radiomic_feature/radiomics_0.7_discovery.csv',index_col=0)

label_caf = np.array(data_all['CAF'])
label_tnbc = np.array(data_all['tnbc'])

data_feature = data_all.drop(['CAF', 'tnbc'], axis=1)
#data_feature = data_feature.values
feature_name = data_feature.columns

print(data_feature.shape)
print(label_caf.shape)
print(label_tnbc.shape)

(461, 36)
(461,)
(461,)


In [15]:
# discovery cohort 
X_train, X_test, y_train, y_test = train_test_split(
        data_feature, label_tnbc, test_size=0.2, stratify=label_tnbc, random_state=42
    )

print(Counter(y_train),Counter(y_test))
data_feature.shape, X_train.shape, X_test.shape

Counter({0: 319, 1: 49}) Counter({0: 81, 1: 12})


((461, 36), (368, 36), (93, 36))

In [16]:
# train model
############### Discovery #######################
discovery_params = {
    'n_estimators': 3, 
    'learning_rate': 0.1, 
    'max_depth': 7, 
    'min_child_weight': 1, 
    'gamma': 0, 
    'subsample': 1.0, 
    'colsample_bytree': 0.7, 
    'reg_alpha': 1, 
    'reg_lambda': 1.5, 
    'scale_pos_weight': 7.7, 
    'objective': 'binary:logistic', 
    'eval_metric': 'aucpr', 
    'random_state': 42,
    'max_delta_step': 0,
    
}

xgb_model = xgb.XGBClassifier(**discovery_params)
xgb_model.fit(X_train, y_train)

In [17]:
y_pred_train = xgb_model.predict(X_train)
acc_train = accuracy_score(y_pred=y_pred_train, y_true=y_train)

pred_prob = xgb_model.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_score=pred_prob[:, 1], y_true=y_test)
roc_auc = auc(fpr, tpr)
y_pred = (pred_prob[:, 1] >= 0.5).astype(int)
acc_con1 = confusion_matrix(y_pred=y_pred, y_true=y_test)
acc_pred1 = accuracy_score(y_pred=y_pred, y_true=y_test)

tn, fp, fn, tp = acc_con1.ravel()
# Specificity
spec_score = tn / (tn + fp) if (tn + fp) > 0 else 0 

precision, recall, thresholds = precision_recall_curve(y_pred, y_test)
pr_auc = auc(recall, precision)

preci_scores = precision_score(y_pred=y_pred, y_true=y_test)
recall_scores = recall_score(y_pred=y_pred, y_true=y_test)


#print("Accuracy Train: ", acc_train)
print("Accuracy Test: ", acc_pred1)
print("Confusion Matrix: ")
print(acc_con1) 
print("ROC AUC: ", roc_auc)
print("PR AUC: ", pr_auc)

print('Recall / Sensitivity: ', recall_scores)
print('Specificity: ', spec_score)


Accuracy Test:  0.7741935483870968
Confusion Matrix: 
[[61 20]
 [ 1 11]]
ROC AUC:  0.8281893004115226
PR AUC:  0.743279569892473
Recall / Sensitivity:  0.9166666666666666
Specificity:  0.7530864197530864
