# CAD PROJECT- Classical Approach
*By Jaime Simarro & Ahmed Gouda*

With this script you can find:
* Model creation using complete data (training+validation)
* Prediction in test set and saving the results 

In [247]:
from google.colab import drive
drive.mount("/content/drive/", force_remount=True)

Mounted at /content/drive/


In [248]:
import numpy as np
import cv2
import os
import glob
import matplotlib.pyplot as plt
import math
!pip install ciecam02
from ciecam02 import rgb2jch, jch2rgb
from skimage.feature import greycomatrix, greycoprops
from pandas import DataFrame
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2,mutual_info_classif,f_classif
! pip install pymrmr
import pymrmr
from sklearn import svm, datasets, metrics
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
! pip install xlutils
from xlutils.copy import copy # http://pypi.python.org/pypi/xlutils
from xlwt import Workbook 
from xlrd import open_workbook # http://pypi.python.org/pypi/xlrd



In [0]:
#### JUST CHANGE THIS #####
dermo_challenge=True

if (dermo_challenge):
  window_tpe='Two'
  R_which_feat_calculate=[ 0,0, 1,  2,  4,4]
  R_which_color_calculate=[1,2, 3,  2,  2,3]

  R_which_feat_calculate_Windowing=[ 0,0, 1,  2]
  R_which_color_calculate_Windowing=[1,2, 3,  2]
  normalization_type=[]
else:
  window_tpe=''

  O_which_feat_calculate=[ 0  ,1,1,1,   2,2,  4,4,4]
  O_which_color_calculate=[1  ,0,1,3,   1,2,  1,2,3]

  O_which_feat_calculate_Windowing=[ 1,1,1,  4,4,4]
  O_which_color_calculate_Windowing=[0,1,3,  1,2,3]

  R_which_feat_calculate=[ 1, 2,  3,  4,4,4]
  R_which_color_calculate=[0, 2,  1,  0,1,3]

  R_which_feat_calculate_Windowing=[ 1,  4,4,4]
  R_which_color_calculate_Windowing=[0,  0,1,3]

###########################
switcher_feat = {
    0: "GLCM",
    1: "Histogram",
    2: "Fintesity",
    3: "Fstat",
    4: "Gabour",
    5: "Combination of features",
}

if (dermo_challenge):
  switcher_color_space = {
      0: "Frgb",
      1: "Fhsv",
      2: "Flab",
      3: "Fjch",  
      4: "Fhybrid",        
      5: "Fsix",
      6: "Combination of color spaces", 
  }
  normalization_type=''
else:
  switcher_color_space = {
      0: "Frgb",
      1: "Fhsv",
      2: "Flab",
      3: "Fjch",
      4: "Ftissue",    
      5: "Combination of color spaces", 

  }
  switcher_normalization = {
    0: "Original",
    1: "Reinhard",
    2: "Macenko",
  }


In [0]:
if (dermo_challenge):
  path_general_feat='/content/drive/My Drive/Colab Notebooks/CAD Project/Dermo Features/'
else:
  path_general_feat='/content/drive/My Drive/Colab Notebooks/CAD Project/Histopathology Features/'


## Load the desired features

In [0]:
def create_names(name_X,color_space_type,feat_type):
  names=[]
  for n in range(name_X.shape[1]):
    names.append(color_space_type[1:]+'_'+feat_type+'_'+str(n))
  return names

def load_list_feat(path4load_folder_class,list_number_Feat,list_number_Color):
  if isinstance(list_number_Feat,int)  : # For int
    list_number_Feat=[list_number_Feat]
  if isinstance(list_number_Color,int)  : # For int
    list_number_Color=[list_number_Color]
  for f in range(len(list_number_Color)):
    feat_type=switcher_feat.get(list_number_Feat[f], "Invalid feature type")
    color_space_type=switcher_color_space.get(list_number_Color[f], "Invalid color space type")
    print("Adding feature ",feat_type,' ',color_space_type )
    path4load=path4load_folder_class+color_space_type+'_'+feat_type+'.npy'
    if(f==0):
      X = np.load(path4load, allow_pickle=True)
      name_feat=create_names(X,color_space_type,feat_type)
    else:
      new_X=np.load(path4load, allow_pickle=True)
      X=np.concatenate((X,new_X),axis=1)
      name_feat=name_feat+create_names(new_X,color_space_type,feat_type)
  return [X,name_feat]

def combine_window(normalization_type,bool_training):
  if(normalization_type=="Original"):
    which_feat_calculate=O_which_feat_calculate
    which_color_calculate=O_which_color_calculate
    which_feat_calculate_Windowing=O_which_feat_calculate_Windowing
    which_color_calculate_Windowing=O_which_color_calculate_Windowing
  else:
    which_feat_calculate=R_which_feat_calculate
    which_color_calculate=R_which_color_calculate
    which_feat_calculate_Windowing=R_which_feat_calculate_Windowing
    which_color_calculate_Windowing=R_which_color_calculate_Windowing
  # Global Features
  print("---Global features---")
  if (bool_training):
    path4load_global=path_general_feat+normalization_type+'/One/train/'
  else:
    path4load_global=path_general_feat+normalization_type+'/One/val/'
  [X_pos_global,names_feat_global]=load_list_feat(path4load_global+'pos/',which_feat_calculate,which_color_calculate)
  [X_neg_global,_]=load_list_feat(path4load_global+'neg/',which_feat_calculate,which_color_calculate)
  # Window Features
  if(window_tpe=='Two' or window_tpe=='Three'):
    print("---Window features---")
    if (bool_training):
      path4load_global=path_general_feat+normalization_type+'/'+window_tpe+'/train/'
    else:
      path4load_global=path_general_feat+normalization_type+'/'+window_tpe+'/val/'    
    [X_pos_window,names_feat_window]=load_list_feat(path4load_global+'pos/',which_feat_calculate_Windowing,which_color_calculate_Windowing)
    [X_neg_window,_]=load_list_feat(path4load_global+'neg/',which_feat_calculate_Windowing,which_color_calculate_Windowing)
    X_pos=np.concatenate((X_pos_global,X_pos_window),axis=1)
    X_neg=np.concatenate((X_neg_global,X_neg_window),axis=1)
    names=names_feat_global+ ["W_"+s  for s in names_feat_window]
  else:
    X_pos=X_pos_global
    X_neg=X_neg_global
    names=names_feat_global
  X=np.concatenate((X_pos,X_neg))
  y_pos=np.ones(X_pos.shape[0])
  y_neg=np.zeros(X_neg.shape[0])
  y=np.concatenate((y_pos,y_neg))
  return X,y,[normalization_type+"_"+s  for s in names]


In [0]:
def load_test(normalization_type):
  if(normalization_type=="Original"):
    which_feat_calculate=O_which_feat_calculate
    which_color_calculate=O_which_color_calculate
    which_feat_calculate_Windowing=O_which_feat_calculate_Windowing
    which_color_calculate_Windowing=O_which_color_calculate_Windowing
  else:
    which_feat_calculate=R_which_feat_calculate
    which_color_calculate=R_which_color_calculate
    which_feat_calculate_Windowing=R_which_feat_calculate_Windowing
    which_color_calculate_Windowing=R_which_color_calculate_Windowing
  path4load_global=path_general_feat+normalization_type+'/One/test/'    
  [X_global,names_feat_global]=load_list_feat(path4load_global,which_feat_calculate,which_color_calculate)
  # Window Features
  if(window_tpe=='Two' or window_tpe=='Three'):
    print("---Window features---")
    path4load_global=path_general_feat+normalization_type+window_tpe+'/test/'
   
    [X_window,names_feat_window]=load_list_feat(path4load_global,which_feat_calculate_Windowing,which_color_calculate_Windowing)
    X=np.concatenate((X_global,X_window),axis=1)
  else:
    X=X_global
  return X


def load_all_histo(bool_training):
  normalization_type=switcher_normalization.get(0, "Invalid normalization type")
  X_original,y,names_original=combine_window(normalization_type,bool_training)

  normalization_type=switcher_normalization.get(1, "Invalid normalization type")
  X_rein,_,names_rein=combine_window(normalization_type,bool_training)

  names=names_original+names_rein
  X=np.concatenate((X_original,X_rein),axis=1)
  rnd = np.random.RandomState(357)
  shuffled_indices = rnd.permutation(np.arange(y.shape[0]))
  X=X[shuffled_indices]
  y=y[shuffled_indices]
  return X,y,names

def load_all_dermo(bool_training):
  X,y,names=combine_window("",bool_training)
  rnd = np.random.RandomState(357)
  shuffled_indices = rnd.permutation(np.arange(y.shape[0]))
  X=X[shuffled_indices]
  y=y[shuffled_indices]
  return X,y,names


In [371]:
if (dermo_challenge):
  print("****Starting training data****")
  X_train,y_train,names=load_all_dermo(bool_training=True)
  print("****Starting validation data****")
  X_val,y_val,_=load_all_dermo(bool_training=False)
  print("****Starting testing data****")
  X_test=load_test("")
else:
  print("****Starting training data****")
  X_train,y_train,names=load_all_histo(bool_training=True)
  print("****Starting validation data****")
  X_val,y_val,_=load_all_histo(bool_training=False)  
  print("****Starting testing data****")
  normalization_type=switcher_normalization.get(0, "Invalid normalization type")
  X_test_original=load_test(normalization_type)
  normalization_type=switcher_normalization.get(1, "Invalid normalization type")
  X_test_rein=load_test(normalization_type)
  X_test=np.concatenate((X_test_original,X_test_rein),axis=1)

X_train=np.concatenate((X_train,X_val),axis=0)
y_train=np.concatenate((y_train,y_val),axis=0)

****Starting training data****
---Global features---
Adding feature  GLCM   Fhsv
Adding feature  GLCM   Flab
Adding feature  Histogram   Fjch
Adding feature  Fintesity   Flab
Adding feature  Gabour   Flab
Adding feature  Gabour   Fjch
Adding feature  GLCM   Fhsv
Adding feature  GLCM   Flab
Adding feature  Histogram   Fjch
Adding feature  Fintesity   Flab
Adding feature  Gabour   Flab
Adding feature  Gabour   Fjch
---Window features---
Adding feature  GLCM   Fhsv
Adding feature  GLCM   Flab
Adding feature  Histogram   Fjch
Adding feature  Fintesity   Flab
Adding feature  GLCM   Fhsv
Adding feature  GLCM   Flab
Adding feature  Histogram   Fjch
Adding feature  Fintesity   Flab
****Starting validation data****
---Global features---
Adding feature  GLCM   Fhsv
Adding feature  GLCM   Flab
Adding feature  Histogram   Fjch
Adding feature  Fintesity   Flab
Adding feature  Gabour   Flab
Adding feature  Gabour   Fjch
Adding feature  GLCM   Fhsv
Adding feature  GLCM   Flab
Adding feature  Histogra

In [0]:
index=np.unique(np.where(np.isnan(X_train))[0])
X_train=np.delete(X_train,index,0)
y_train=np.delete(y_train,index,0)

## Normalization

In [373]:
mean_X  = np.mean(X_train, 0)
std_X   = np.std(X_train,0)
X_train = (X_train-mean_X)/std_X
X_test = (X_test-mean_X)/std_X

X_train.shape

(5999, 342)


## Training the model

In [0]:
def match_feat(X,selected,names):
  new_selection=[]
  for n in names:
    flag=False
    for s in selected:
      if(n==s):
        flag=True
    new_selection.append(flag)
  new_X=X[:,new_selection]
  return new_X

# Model creation

In [0]:
bool_model_creation=False
if(bool_model_creation):
  values=[10,50,100,200,250,300,340]
  k=200
  if(window_tpe=='Two'):
    selected=np.load(path_general_feat+ 'W2_f_classif_'+str(k)+'.npy')
  else:
    if(window_tpe=='Three'):
      selected=np.load(path_general_feat+ 'W3_f_classif_'+str(k)+'.npy')
    else:
      selected=np.load(path_general_feat+ 'f_classif_'+str(k)+'.npy')
  s_X_train=match_feat(X_train,selected,names)
  s_X_test=match_feat(X_test,selected,names)
  print("----------------------")
  print(s_X_train.shape,s_X_test.shape)
  if(True):
    print("SVM ",window_tpe)
    clf = svm.SVC(gamma='scale', random_state=0,probability=True).fit(s_X_train, y_train) 
  else:
    print("RF ",window_tpe)
    clf = RandomForestClassifier(n_estimators=1000,random_state=0).fit(s_X_train, y_train) 
  y_score=clf.predict_proba(s_X_test)[:, 1]

In [0]:
from joblib import dump, load
if(bool_model_creation):
  dump(clf, path_general_feat+'Prediction_SVM_200.joblib') 

## Test prediction and Saving the results

In [377]:
# Load models
if(dermo_challenge):
  prediction_threshold=0.5
  clf_svm = load(path_general_feat+'Prediction_SVM_200.joblib') 
  clf_rf = load(path_general_feat+'Prediction_RF_200.joblib') 
  selected_rf=np.load(path_general_feat+ 'W2_f_classif_200.npy')
  selected_svm=np.load(path_general_feat+ 'W2_f_classif_200.npy')
else:
  prediction_threshold=0.4
  clf_rf = load(path_general_feat+'Prediction_RF_700.joblib') 
  clf_svm = load(path_general_feat+'Prediction_SVM_700.joblib') 
  selected_rf=np.load(path_general_feat+ 'f_classif_700.npy')
  selected_svm=np.load(path_general_feat+ 'f_classif_700.npy')
# RF
s_X_test_rf=match_feat(X_test,selected_rf,names)
y_score_rf=clf_rf.predict_proba(s_X_test_rf)[:, 1]
# SVM
s_X_test_svm=match_feat(X_test,selected_svm,names)
y_score_svm=clf_svm.predict_proba(s_X_test_svm)[:, 1]

# Ensemble
if(dermo_challenge):
  y_score_ems=np.mean((y_score_svm,y_score_rf),axis=0)
else:
  y_score_ems=np.mean((y_score_svm,y_score_svm,y_score_rf),axis=0)

print("Mean of scores is: ",np.mean( y_score_ems))
print("Mean of predictions with 0.5 is (default) is: ",np.mean( y_score_ems>0.5))
print("Mean of predictions with ",prediction_threshold," is: ",np.mean( y_score_ems>prediction_threshold))


Mean of scores is:  0.41985383708515994
Mean of predictions with 0.5 is (default) is:  0.43349753694581283
Mean of predictions with  0.5  is:  0.43349753694581283


In [381]:
excel_path=path_general_feat+'PREDICTION_solved.xlsx'
if dermo_challenge:
  path="/content/drive/My Drive/Colab Notebooks/CAD Project/Dermo Challenge/test/"
  img_dir=glob.glob(path+'*.jpg')
  
else:
  path="/content/drive/My Drive/Colab Notebooks/CAD Project/Histopathology Challenge/test/"
  img_dir=glob.glob(path+'*.png')

n_img=len(img_dir)
print("Total number of images: ", n_img)
unsort = [];
for p in range(n_img):
  name=img_dir[p].replace(path,"")
  name=name.replace("xx","")
  name=name.replace(".jpg","")
  name=name.replace("testImage","")
  name=name.replace(".png","")
  unsort.append(int(name))
index=np.argsort(unsort)

# Otherwise, workbook is created 
wb = Workbook() 
# add_sheet is used to create sheet. 
w_sheet = wb.add_sheet('Sheet 1') 

for p in range(len(y_score_ems)):
  new_index=index[p]
  if y_score_ems[new_index]>prediction_threshold:
    if(dermo_challenge):
      w_sheet.write(p,0,0)
    else:
      w_sheet.write(p,0,1)
  else:
    if(dermo_challenge):
      w_sheet.write(p,0,1)
    else:
      w_sheet.write(p,0,0)

wb.save(excel_path) 

Total number of images:  1015
