In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

### Load subtype label and predicted result of each patch.
#### Note that only subtype label are provided for subtyping task.

In [39]:
#load patch list with corresponding labels - subtype label
test_set = pd.read_csv("/home5/hby/subtype_newdata/0.3/res_testset.txt", header=None, sep=' ')
test_set.columns = ['filename','label']
#test_set = test_set.merge(test_list, how='left', on='slidename')

In [40]:
# load corresponding predicted results
predict_all = pd.read_csv("/home5/hby/subtype_newdata/0.3/result/tcga.csv", header=None)
predict_all.columns = ['cancer','normal','ccrcc','prcc', 'chrcc']
predict_all.reset_index(drop=True, inplace=True)

In [41]:
# check the patch list length is same or not with predicted list
print(predict_all.shape)
print(test_set.shape)

(1688822, 5)
(1688822, 2)


In [7]:
# concat two list
test_set = pd.concat([test_set, predict_all], axis=1)
test_set['slidename'] = test_set['filename'].apply(lambda x: x.split('/')[-2])
print(test_set.head(5))

                                            filename  label  cancer  normal  \
0  /home1/gzy/Subtype/Patches/2000/TCGA-B0-5692-0...      0  1.9121 -1.9150   
1  /home1/gzy/Subtype/Patches/2000/TCGA-B0-5692-0...      0 -1.5974  1.6026   
2  /home1/gzy/Subtype/Patches/2000/TCGA-B0-5692-0...      0  1.2498 -1.2465   
3  /home1/gzy/Subtype/Patches/2000/TCGA-B0-5692-0...      0  1.8097 -1.8122   
4  /home1/gzy/Subtype/Patches/2000/TCGA-B0-5692-0...      0  1.6865 -1.6885   

    ccrcc    prcc   chrcc                                          slidename  
0  2.7109 -2.1903 -2.0744  TCGA-B0-5692-01Z-00-DX1_34477dae-21a5-45fc-b94...  
1  0.4500 -0.1379 -0.7369  TCGA-B0-5692-01Z-00-DX1_34477dae-21a5-45fc-b94...  
2  2.2468 -2.1179 -1.3085  TCGA-B0-5692-01Z-00-DX1_34477dae-21a5-45fc-b94...  
3  2.6621 -2.0862 -1.9377  TCGA-B0-5692-01Z-00-DX1_34477dae-21a5-45fc-b94...  
4  2.9301 -2.0802 -2.2329  TCGA-B0-5692-01Z-00-DX1_34477dae-21a5-45fc-b94...  


In [8]:
# softmax targets
crds = ['cancer','normal']
subtypes = ['ccrcc','prcc','chrcc']

In [9]:
predict_crd = np.array(predict_all[crds])
predict_crd_norm = np.zeros_like(predict_crd)
for i in range(predict_crd.shape[0]):
    predict_crd_norm[i,:] = np.exp(predict_crd[i,:])/sum(np.exp(predict_crd[i,:]))
test_set[crds] = predict_crd_norm

In [11]:
predicted_results = np.array(predict_all[subtypes])
predicted_results_norm = np.zeros_like(predicted_results)
for i in range(predicted_results.shape[0]):
    predicted_results_norm[i,:] = np.exp(predicted_results[i,:])/sum(np.exp(predicted_results[i,:]))
test_set[subtypes] = predicted_results_norm

In [12]:
# select only cancer patches for aggregation of subtyping prediction.
test_set_cancer = test_set[test_set['normal'] < 0.5].reset_index(drop=True)

### Report the evaluation metrics for classification

Two ways to do aggregation
1. way one sum the predict values

In [13]:
test_set_group_mean = test_set_cancer.groupby(by='slidename', as_index=False).mean()
test_set_group_mean['predict'] = np.argmax(np.array(test_set_group_mean[subtypes]), axis=1)
print(classification_report(test_set_group_mean['label'], test_set_group_mean['predict'], target_names=subtypes, digits=4))
print(confusion_matrix(test_set_group_mean['label'], test_set_group_mean['predict'], labels=[0,1,2]))

              precision    recall  f1-score   support

       ccrcc     0.9306    0.9617    0.9459       209
        prcc     0.9691    0.8920    0.9290       176
       chrcc     0.8571    0.9429    0.8980        70

    accuracy                         0.9319       455
   macro avg     0.9189    0.9322    0.9243       455
weighted avg     0.9342    0.9319    0.9320       455

[[201   4   4]
 [ 12 157   7]
 [  3   1  66]]


2. way two sum the predict labels

In [14]:
test_set_cancer['predict'] = np.argmax(np.array(test_set_cancer[subtypes]), axis=1)
test_set_group_mod = test_set_cancer[['slidename','label','predict']].groupby('slidename', as_index=False).agg(lambda x: x.value_counts().index[0])
print(classification_report(test_set_group_mod['label'], test_set_group_mod['predict'], target_names=subtypes, digits=4))
print(confusion_matrix(test_set_group_mod['label'], test_set_group_mod['predict'], labels=[0,1,2]))

              precision    recall  f1-score   support

       ccrcc     0.9393    0.9617    0.9504       209
        prcc     0.9691    0.8920    0.9290       176
       chrcc     0.8481    0.9571    0.8993        70

    accuracy                         0.9341       455
   macro avg     0.9188    0.9370    0.9262       455
weighted avg     0.9368    0.9341    0.9342       455

[[201   4   4]
 [ 11 157   8]
 [  2   1  67]]


### Calculate the AUROC values

In [15]:
for i in range(len(subtypes)):
    sub = subtypes[i]
    test_set_cancer[sub] = test_set_cancer['predict'].apply(lambda x: 1 if x==i else 0)
test_set_cancer['all'] = [1 for i in range(test_set_cancer.shape[0])]
test_set_group_predict = test_set_cancer[['slidename','all']+subtypes].groupby(by='slidename', as_index=False).sum()

for i in range(len(subtypes)):
    sub = subtypes[i]
    test_set_group_predict[sub] = test_set_group_predict[sub]/test_set_group_predict['all']

In [28]:
from sklearn.preprocessing import label_binarize
y_one_hot = label_binarize(test_set_group_mod['label'], classes=[0,1,2])
y_pred = np.array(test_set_group_predict[subtypes])

In [29]:
from sklearn.metrics import roc_auc_score
print("Micro AUC: %f" % roc_auc_score(y_one_hot, y_pred, average='micro'))
print("Macro AUC: %f" % roc_auc_score(y_one_hot, y_pred, average='macro'))

Micro AUC: 0.989080
Macro AUC: 0.989765


In [30]:
print("class 1 AUC: %f" % roc_auc_score(y_one_hot[:,0], y_pred[:,0]))
print("class 2 AUC: %f" % roc_auc_score(y_one_hot[:,1], y_pred[:,1]))
print("class 3 AUC: %f" % roc_auc_score(y_one_hot[:,2], y_pred[:,2]))

class 1 AUC: 0.984070
class 2 AUC: 0.990938
class 3 AUC: 0.994286
