The Impact of Grad-CAM Parameters on Results

In [None]:
"""
Using gradient information to generate heat maps showing the regions of interest of the network, 
namely, the corresponding markers. 
Image annotations can be obtained from the extraction of weighting parameters. 
The {gradsize} of the attention score is extracted and converted into lesion annotation. 
"""

Model

In [4]:
import sklearn
from sklearn import feature_selection as fs
 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn import tree
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB, CategoricalNB, ComplementNB
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.neighbors import NearestNeighbors,KDTree,BallTree
from sklearn.ensemble import GradientBoostingClassifier

from catboost import CatBoostClassifier
import xgboost as xgb

In [25]:
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc, r2_score
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import precision_recall_curve, precision_score, average_precision_score, recall_score, f1_score

In [3]:
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.under_sampling import ClusterCentroids, EditedNearestNeighbours
from imblearn.under_sampling import RandomUnderSampler



gradsize = 0.5

In [40]:
######### TCGA discovery features #############
data_all = pd.read_csv('./Data/radiomic_feature/radiomics_0.5_discovery.csv',index_col=0)

label_caf = np.array(data_all['CAF'])
label_tnbc = np.array(data_all['tnbc'])

data_feature = data_all.drop(['CAF', 'tnbc'], axis=1)
#data_feature = data_feature.values
feature_name = data_feature.columns

print(data_feature.shape)
print(label_caf.shape)
print(label_tnbc.shape)

(461, 36)
(461,)
(461,)


In [41]:
# discovery cohort 
X_train,X_test,y_train,y_test = train_test_split(data_feature, label_tnbc, test_size=0.2,random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(368, 36) (93, 36) (368,) (93,)


In [42]:
# oversample
ros = RandomOverSampler(sampling_strategy=1,random_state=0)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
print(X_resampled.shape, y_resampled.shape)
print(Counter(y_resampled))

(648, 36) (648,)
Counter({1: 324, 0: 324})


In [52]:
# model
clf = xgb.XGBClassifier()

clf.fit(X_resampled, y_resampled)

In [53]:
y_pred = clf.predict(X_test)
y_pred_train = clf.predict(X_train)
acc_train = accuracy_score(y_pred=y_pred_train, y_true=y_train)

pred_prob = clf.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_score=pred_prob[:, 1], y_true=y_test)
roc_auc = auc(fpr, tpr)
acc_con1 = confusion_matrix(y_pred=y_pred, y_true=y_test)
acc_pred1 = accuracy_score(y_pred=y_pred, y_true=y_test)

precision, recall, thresholds = precision_recall_curve(y_pred, y_test)
pr_auc = auc(recall, precision)

ap_score = average_precision_score(y_pred, y_test)

preci_scores = precision_score(y_pred=y_pred, y_true=y_test)
recall_scores = recall_score(y_pred=y_pred, y_true=y_test)
f1_scores = f1_score(y_pred=y_pred, y_true=y_test)


print("Accuracy Train: ", acc_train)
print("Accuracy Test: ", acc_pred1)
print("Confusion Matrix: ")
print(acc_con1) 
print("ROC AUC: ", roc_auc)
print("PR AUC: ", pr_auc)
print('F1 score: ', f1_scores)
print('Precision: ', preci_scores)
print('Recall: ', recall_scores)
print("AP scores: ", ap_score)

Accuracy Train:  1.0
Accuracy Test:  0.8064516129032258
Confusion Matrix: 
[[ 1 16]
 [ 2 74]]
ROC AUC:  0.6958204334365325
PR AUC:  0.9839747217506131
F1 score:  0.891566265060241
Precision:  0.8222222222222222
Recall:  0.9736842105263158
AP scores:  0.9726278060743256


gradsize = 0.6

In [35]:
######### TCGA discovery features #############
data_all = pd.read_csv('./Data/radiomic_feature/radiomics_0.6_discovery.csv',index_col=0)

label_caf = np.array(data_all['CAF'])
label_tnbc = np.array(data_all['tnbc'])

data_feature = data_all.drop(['CAF', 'tnbc'], axis=1)
#data_feature = data_feature.values
feature_name = data_feature.columns

print(data_feature.shape)
print(label_caf.shape)
print(label_tnbc.shape)

(461, 36)
(461,)
(461,)


In [36]:
# discovery cohort 
X_train,X_test,y_train,y_test = train_test_split(data_feature, label_tnbc, test_size=0.2,random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(368, 36) (93, 36) (368,) (93,)


In [37]:
# oversample
ros = RandomOverSampler(sampling_strategy=1,random_state=0)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
print(X_resampled.shape, y_resampled.shape)
print(Counter(y_resampled))

(648, 36) (648,)
Counter({1: 324, 0: 324})


In [38]:
# model
clf = xgb.XGBClassifier()

clf.fit(X_resampled, y_resampled)

In [39]:
y_pred = clf.predict(X_test)
y_pred_train = clf.predict(X_train)
acc_train = accuracy_score(y_pred=y_pred_train, y_true=y_train)

pred_prob = clf.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_score=pred_prob[:, 1], y_true=y_test)
roc_auc = auc(fpr, tpr)
acc_con1 = confusion_matrix(y_pred=y_pred, y_true=y_test)
acc_pred1 = accuracy_score(y_pred=y_pred, y_true=y_test)

precision, recall, thresholds = precision_recall_curve(y_pred, y_test)
pr_auc = auc(recall, precision)

ap_score = average_precision_score(y_pred, y_test)

preci_scores = precision_score(y_pred=y_pred, y_true=y_test)
recall_scores = recall_score(y_pred=y_pred, y_true=y_test)
f1_scores = f1_score(y_pred=y_pred, y_true=y_test)


print("Accuracy Train: ", acc_train)
print("Accuracy Test: ", acc_pred1)
print("Confusion Matrix: ")
print(acc_con1) 
print("ROC AUC: ", roc_auc)
print("PR AUC: ", pr_auc)
print('F1 score: ', f1_scores)
print('Precision: ', preci_scores)
print('Recall: ', recall_scores)
print("AP scores: ", ap_score)

Accuracy Train:  1.0
Accuracy Test:  0.8279569892473119
Confusion Matrix: 
[[ 2 15]
 [ 1 75]]
ROC AUC:  0.75
PR AUC:  0.9907328805885682
F1 score:  0.9036144578313253
Precision:  0.8333333333333334
Recall:  0.9868421052631579
AP scores:  0.9836587436332767


gradsize = 0.7

In [45]:
######### TCGA discovery features #############
data_all = pd.read_csv('./Data/radiomic_feature/radiomics_0.7_discovery.csv',index_col=0)

label_caf = np.array(data_all['CAF'])
label_tnbc = np.array(data_all['tnbc'])

data_feature = data_all.drop(['CAF', 'tnbc'], axis=1)
#data_feature = data_feature.values
feature_name = data_feature.columns

print(data_feature.shape)
print(label_caf.shape)
print(label_tnbc.shape)

(461, 36)
(461,)
(461,)


In [46]:
# discovery cohort 
X_train,X_test,y_train,y_test = train_test_split(data_feature, label_tnbc, test_size=0.2,random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(368, 36) (93, 36) (368,) (93,)


In [47]:
# oversample
ros = RandomOverSampler(sampling_strategy=1,random_state=0)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
print(X_resampled.shape, y_resampled.shape)
print(Counter(y_resampled))

(648, 36) (648,)
Counter({1: 324, 0: 324})


In [48]:
# model
clf = xgb.XGBClassifier()

clf.fit(X_resampled, y_resampled)

In [49]:
y_pred = clf.predict(X_test)
y_pred_train = clf.predict(X_train)
acc_train = accuracy_score(y_pred=y_pred_train, y_true=y_train)

pred_prob = clf.predict_proba(X_test)
fpr, tpr, thresholds = roc_curve(y_score=pred_prob[:, 1], y_true=y_test)
roc_auc = auc(fpr, tpr)
acc_con1 = confusion_matrix(y_pred=y_pred, y_true=y_test)
acc_pred1 = accuracy_score(y_pred=y_pred, y_true=y_test)

precision, recall, thresholds = precision_recall_curve(y_pred, y_test)
pr_auc = auc(recall, precision)

ap_score = average_precision_score(y_pred, y_test)

preci_scores = precision_score(y_pred=y_pred, y_true=y_test)
recall_scores = recall_score(y_pred=y_pred, y_true=y_test)
f1_scores = f1_score(y_pred=y_pred, y_true=y_test)


print("Accuracy Train: ", acc_train)
print("Accuracy Test: ", acc_pred1)
print("Confusion Matrix: ")
print(acc_con1) 
print("ROC AUC: ", roc_auc)
print("PR AUC: ", pr_auc)
print('F1 score: ', f1_scores)
print('Precision: ', preci_scores)
print('Recall: ', recall_scores)
print("AP scores: ", ap_score)

Accuracy Train:  1.0
Accuracy Test:  0.8064516129032258
Confusion Matrix: 
[[ 1 16]
 [ 2 74]]
ROC AUC:  0.6958204334365325
PR AUC:  0.9839747217506131
F1 score:  0.891566265060241
Precision:  0.8222222222222222
Recall:  0.9736842105263158
AP scores:  0.9726278060743256
